## Import Dependencies

In [None]:
import ollama
import psycopg2
import json
import pandas as pd
from datetime import datetime

## Configuration

In [None]:
# PostgreSQL connection
POSTGRES_CONFIG = {
    'host': 'localhost',
    'port': 5432,
    'database': 'rag_db',
    'user': 'postgres',
    'password': 'postgres',
}

# Test embedding model
EMBEDDING_MODEL = 'hf.co/CompendiumLabs/bge-base-en-v1.5-gguf'
LANGUAGE_MODEL = 'hf.co/bartowski/Llama-3.2-1B-Instruct-GGUF'

## Part 1: Discover Available Embeddings

**What's in the registry?** Query the embedding_registry table to see what models and embeddings you have available.

In [None]:
def list_available_embeddings(db_connection):
    """Query embedding_registry to show available models with metadata.
    
    Returns:
        DataFrame with columns: model_alias, model_name, dimension, embedding_count, etc.
    """
    query = '''
        SELECT 
            model_alias,
            model_name,
            dimension,
            embedding_count,
            chunk_source_dataset,
            chunk_size_config,
            created_at,
            last_accessed
        FROM embedding_registry
        ORDER BY created_at DESC
    '''
    return pd.read_sql(query, db_connection)

# Connect and discover embeddings
try:
    conn = psycopg2.connect(
        host=POSTGRES_CONFIG['host'],
        port=POSTGRES_CONFIG['port'],
        database=POSTGRES_CONFIG['database'],
        user=POSTGRES_CONFIG['user'],
        password=POSTGRES_CONFIG['password']
    )
    print(f'✓ Connected to PostgreSQL')
    
    # Query registry
    available = list_available_embeddings(conn)
    
    if available.empty:
        print("\n⚠️  No embeddings found in registry!")
        print("\nTo populate the registry, run: foundation/02-rag-postgresql-persistent.ipynb")
    else:
        print("\n=== Available Embeddings ===")
        print(available.to_string(index=False))
        
except psycopg2.OperationalError as e:
    print(f'✗ Failed to connect: {e}')
    print("Make sure PostgreSQL is running")

## Part 2: Load Embeddings from Registry

**Now that you know what embeddings exist, load them!** This is instant (no regeneration needed).

In [None]:
class PostgreSQLVectorDB:
    """Helper to load embeddings from PostgreSQL without regeneration."""
    
    def __init__(self, config, table_name):
        self.config = config
        self.table_name = table_name
        self.conn = psycopg2.connect(
            host=config['host'],
            port=config['port'],
            database=config['database'],
            user=config['user'],
            password=config['password']
        )
        print(f'✓ Connected to table: {table_name}')
    
    def get_chunk_count(self):
        """How many embeddings are stored?"""
        with self.conn.cursor() as cur:
            cur.execute(f'SELECT COUNT(*) FROM {self.table_name}')
            return cur.fetchone()[0]
    
    def similarity_search(self, query_embedding, top_n=3):
        """Retrieve most similar chunks using pgvector."""
        with self.conn.cursor() as cur:
            cur.execute(f'''
                SELECT chunk_text, 
                       1 - (embedding <=> %s::vector) as similarity
                FROM {self.table_name}
                ORDER BY embedding <=> %s::vector
                LIMIT %s
            ''', (query_embedding, query_embedding, top_n))
            
            results = cur.fetchall()
            return [(chunk, score) for chunk, score in results]
    
    def close(self):
        if self.conn:
            self.conn.close()

# Load embeddings from registry (instant - no regeneration)
EMBEDDING_MODEL_ALIAS = 'bge_base_en_v1_5'  # Match what foundation/02 registered
TABLE_NAME = f'embeddings_{EMBEDDING_MODEL_ALIAS.replace(".", "_")}'

try:
    vector_db = PostgreSQLVectorDB(POSTGRES_CONFIG, TABLE_NAME)
    count = vector_db.get_chunk_count()
    
    print(f'\n✓ Loaded {count:,} embeddings from PostgreSQL (instant! no regeneration needed)')
    print(f'  Table: {TABLE_NAME}')
    print(f'  Time: <1 second (vs. 50+ minutes to regenerate)')
    
except Exception as e:
    print(f'✗ Could not load embeddings: {e}')
    print(f'\nMake sure foundation/02-rag-postgresql-persistent.ipynb has been run')

## Part 3: Retrieve Using Loaded Embeddings

**Now you can perform RAG operations instantly using the pre-generated embeddings!**

In [None]:
def retrieve(query, top_n=3):
    """Retrieve most relevant chunks for a query using loaded embeddings."""
    # Generate query embedding (fast, only for this one query)
    query_embedding = ollama.embed(model=EMBEDDING_MODEL, input=query)['embeddings'][0]
    
    # Search against all the pre-stored embeddings (instant)
    return vector_db.similarity_search(query_embedding, top_n=top_n)

# Test retrieval with loaded embeddings
test_query = "What is photosynthesis?"
print(f"Query: '{test_query}'\n")

results = retrieve(test_query, top_n=3)
print("Retrieved chunks:")
for i, (chunk, score) in enumerate(results, 1):
    title = chunk.split('\n')[0]
    preview = chunk[:200].replace('\n', ' ') + '...'
    print(f"\n  [{i}] Similarity: {score:.4f}")
    print(f"      {title}")
    print(f"      {preview}")

## Part 4: The Load-or-Generate Pattern

**Best Practice:** Check registry first, then decide to load or generate.

This is the **core pattern** that enables fast iteration in advanced notebooks. Instead of regenerating embeddings every time you want to experiment with a new technique, you load them from the registry (instant) and focus on your technique implementation.

**Time Savings Example:**
- Without registry: 50 min (embedding generation) + 5 min (technique) = 55 min per experiment
- With registry: <1 sec (load) + 5 min (technique) = 5 min per experiment
- Result: 11× faster iteration!

### Why This Pattern Matters

When you move to advanced technique notebooks, you'll want to experiment with:
- Different reranking strategies
- Query expansion techniques
- Hybrid search methods
- Prompt engineering variations

With the load-or-generate pattern, each experiment takes 5 minutes instead of 55 minutes. This fundamentally changes how fast you can learn and iterate.

### The Three Decisions

When loading embeddings, the code makes three intelligent decisions:

1. **Check Registry** - Are embeddings already stored for this model?
2. **Display Time Savings** - Show user: "Found X embeddings, loading in <1 sec vs 50 min"
3. **Interactive Choice** - Prompt: "Load existing? (recommended) [y/n]"

You can also programmatically control this with the `preserve_existing` parameter:
- `None`: Prompt user interactively (default, safest for learning)
- `True`: Always load (fastest, best for experiments)
- `False`: Regenerate from scratch (useful for fresh runs)

In [None]:
def list_available_embeddings(db_connection):
    """Query embedding_registry to list all available embedding models.

    Returns:
        DataFrame with columns: model_alias, model_name, dimension,
                               embedding_count, chunk_source_dataset,
                               chunk_size_config, created_at
    """
    try:
        query = '''
            SELECT
                model_alias,
                model_name,
                dimension,
                embedding_count,
                chunk_source_dataset,
                chunk_size_config,
                created_at,
                last_accessed
            FROM embedding_registry
            ORDER BY created_at DESC
        '''
        return pd.read_sql(query, db_connection)
    except Exception as e:
        print(f"Note: Could not query registry: {e}")
        print("This is normal if foundation/00-setup-postgres-schema.ipynb hasn't been run yet.")
        return pd.DataFrame()


def get_embedding_metadata(db_connection, model_alias):
    """Fetch detailed metadata for a specific embedding model.

    This helps verify compatibility and understand the configuration:
    - Dimension: Must match for retrieval operations
    - Chunk config: How the source data was split
    - Created date: When this set was generated
    - Metadata JSON: Custom notes, URLs, training info

    Args:
        db_connection: PostgreSQL connection
        model_alias: The model alias (e.g., 'bge_base_en_v1_5')

    Returns:
        Dict with metadata, or None if not found
    """
    try:
        with db_connection.cursor() as cur:
            cur.execute('''
                SELECT
                    dimension,
                    embedding_count,
                    chunk_source_dataset,
                    chunk_size_config,
                    created_at,
                    last_accessed,
                    metadata_json
                FROM embedding_registry
                WHERE model_alias = %s
            ''', (model_alias,))

            result = cur.fetchone()
            if result:
                return {
                    'dimension': result[0],
                    'embedding_count': result[1],
                    'chunk_source_dataset': result[2],
                    'chunk_size_config': result[3],
                    'created_at': result[4],
                    'last_accessed': result[5],
                    'metadata_json': json.loads(result[6]) if result[6] else {}
                }
        return None
    except Exception as e:
        print(f"Could not fetch metadata: {e}")
        return None


def load_or_generate(db_connection, embedding_model_alias, preserve_existing=None):
    """Smart pattern: Load embeddings from registry OR generate if not available.

    This is the CORE PATTERN for the intermediate/advanced learning system.
    It enables fast iteration by:
    - Checking if embeddings already exist in PostgreSQL registry
    - If they exist: Load instantly (<1 second, no regeneration)
    - If not exist: Show guidance on how to generate them
    - If user chooses to regenerate: Handle regeneration workflow

    The preserve_existing flag controls the decision logic:
    - None (default): Prompt user interactively (safest for learning)
    - True: Always load existing embeddings (fastest, best for experiments)
    - False: Always regenerate from scratch (useful for fresh runs)

    Args:
        db_connection: PostgreSQL connection object
        embedding_model_alias: Model identifier (e.g., 'bge_base_en_v1_5')
                              Should match what was used in foundation/02
        preserve_existing: Decision flag for load vs regenerate
                          - None: prompt user
                          - True: always load
                          - False: regenerate (requires foundation/02 to be re-run)

    Returns:
        PostgreSQLVectorDB instance ready for use, or None if no embeddings available

    Raises:
        ValueError: If user cancels the operation
    """

    # === Step 1: Check if embeddings exist in registry ===
    print(f"Checking for embeddings: '{embedding_model_alias}'...")

    try:
        with db_connection.cursor() as cur:
            cur.execute('''
                SELECT id, dimension, embedding_count, created_at, metadata_json
                FROM embedding_registry
                WHERE model_alias = %s
            ''', (embedding_model_alias,))
            registry_entry = cur.fetchone()
    except Exception as e:
        print(f"Could not query registry: {e}")
        print("\nMake sure foundation/00-setup-postgres-schema.ipynb has been run.")
        return None

    # === Step 2: Case A - Embeddings exist ===
    if registry_entry:
        reg_id, dimension, embedding_count, created_at, metadata_json = registry_entry

        # Display what we found
        print(f"\n{'='*60}")
        print(f"✓ FOUND EXISTING EMBEDDINGS")
        print(f"{'='*60}")
        print(f"Model:        {embedding_model_alias}")
        print(f"Count:        {embedding_count:,} embeddings")
        print(f"Dimension:    {dimension}")
        print(f"Created:      {created_at}")
        print(f"\nTIME SAVINGS:")
        print(f"  Loading embeddings: <1 second")
        print(f"  Regenerating:       ~50+ minutes")
        print(f"  ➜ You save 50+ minutes by loading!")
        print(f"{'='*60}\n")

        # === Step 3: Decide whether to load or regenerate ===
        should_load = True

        if preserve_existing is False:
            # User explicitly wants to regenerate
            print("⚠️  regeneration mode (preserve_existing=False)")
            print("Not implemented in this notebook.")
            print("To regenerate: run foundation/02-rag-postgresql-persistent.ipynb")
            return None

        elif preserve_existing is None:
            # Interactive prompt - safest for learning
            print("What would you like to do?\n")
            print("  [l] Load existing embeddings (recommended)")
            print("  [r] Regenerate from scratch")
            print("  [c] Cancel\n")

            while True:
                response = input("Choice [l/r/c]: ").lower().strip()

                if response in ['l', 'load']:
                    should_load = True
                    print("\n✓ Loading existing embeddings...")
                    break
                elif response in ['r', 'regenerate']:
                    print("\nTo regenerate embeddings:")
                    print("  Run foundation/02-rag-postgresql-persistent.ipynb")
                    print("  Set PRESERVE_EXISTING_EMBEDDINGS = False")
                    return None
                elif response in ['c', 'cancel']:
                    raise ValueError("User cancelled operation")
                else:
                    print('Invalid choice. Enter "l", "r", or "c"')

        # === Step 4: Load embeddings if decided ===
        if should_load:
            try:
                table_name = f'embeddings_{embedding_model_alias.replace(".", "_")}'

                # Create PostgreSQL connection for the vector DB
                db_instance = PostgreSQLVectorDB(
                    config=POSTGRES_CONFIG,
                    table_name=table_name,
                    preserve_existing=True  # Already exists, don't recreate
                )

                count = db_instance.get_chunk_count()
                print(f"\n{'='*60}")
                print(f"✓ LOADED SUCCESSFULLY")
                print(f"{'='*60}")
                print(f"Embeddings loaded: {count:,}")
                print(f"Table:            {table_name}")
                print(f"Status:           Ready for retrieval operations")
                print(f"{'='*60}\n")

                return db_instance

            except Exception as e:
                print(f"\n✗ Error loading embeddings: {e}")
                print(f"\nTroubleshooting:")
                print(f"  1. Verify PostgreSQL is running")
                print(f"  2. Check POSTGRES_CONFIG settings (above)")
                print(f"  3. Run foundation/02 to generate embeddings first")
                return None

    # === Step 5: Case B - No embeddings found ===
    else:
        print(f"\n{'='*60}")
        print(f"✗ NO EMBEDDINGS FOUND")
        print(f"{'='*60}")
        print(f"Model:  {embedding_model_alias}")
        print(f"\nTo create embeddings, run:")
        print(f"  foundation/02-rag-postgresql-persistent.ipynb")
        print(f"\nThen come back here and re-run this cell.")
        print(f"{'='*60}\n")

        return None


# === DEMONSTRATION: Use the load-or-generate pattern ===

print("Demonstrating load-or-generate pattern...\n")

# First, list what embeddings are available in the registry
print("Step 1: Checking registry for available embeddings...\n")
available = list_available_embeddings(conn)

if available.empty:
    print("⚠️  No embeddings found in registry yet.")
    print("Run foundation/02-rag-postgresql-persistent.ipynb first.\n")
else:
    print("Available embeddings:")
    print(available.to_string(index=False))
    print()

# Now use load-or-generate to intelligently load or handle missing embeddings
print("Step 2: Using load-or-generate pattern...\n")
EMBEDDING_MODEL_ALIAS = 'bge_base_en_v1.5'  # Match the alias from foundation/02

loaded_db = load_or_generate(
    db_connection=conn,
    embedding_model_alias=EMBEDDING_MODEL_ALIAS,
    preserve_existing=True  # For this notebook, always load if available
)

if loaded_db:
    print(f"\n✓ Success! You can now use the embeddings.")
    print(f"Total embeddings available: {loaded_db.get_chunk_count():,}\n")
else:
    print(f"\n⚠️  Could not load embeddings. See instructions above.")

## Part 5: Query Registry Metadata

**Advanced:** Access detailed metadata about registered embeddings.

## Part 4b: Understanding the Load-or-Generate Pattern

The three utility functions work together to implement a smart, user-friendly pattern:

### Function 1: `list_available_embeddings(db_connection)`
Shows all embedding models in the registry. Run this to see what you have available.

**Returns:** DataFrame with all registered models and their metadata
**Use case:** "What embedding models do I have available for reuse?"

### Function 2: `get_embedding_metadata(db_connection, model_alias)`
Gets detailed information about one specific embedding model.

**Returns:** Dict with dimension, count, chunk config, created date, custom metadata
**Use case:** "Is this embedding model compatible? How was it generated?"

### Function 3: `load_or_generate(db_connection, embedding_model_alias, preserve_existing=None)`
The main pattern - checks registry and intelligently decides whether to load or generate.

**Key features:**
- Checks if embeddings exist in registry
- Shows time savings message if found
- Prompts user interactively (or uses preserve_existing flag)
- Handles errors gracefully with troubleshooting tips
- Returns PostgreSQLVectorDB ready for retrieval

**Arguments:**
- `db_connection`: Your PostgreSQL connection
- `embedding_model_alias`: Model to look for (e.g., 'bge_base_en_v1.5')
- `preserve_existing`: None=prompt, True=always load, False=regenerate

**Returns:** PostgreSQLVectorDB instance, or None if not found

### Decision Flow

```
load_or_generate() called
    ↓
Check registry for embeddings
    ↓
    ├─ Embeddings found:
    │   ├─ preserve_existing=None? → Ask user
    │   ├─ preserve_existing=True?  → Load automatically
    │   └─ preserve_existing=False? → Tell user how to regenerate
    │
    └─ No embeddings found:
        └─ Show error with instructions
```

### Example: Using in Your Code

```python
# In any intermediate or advanced notebook:

# Step 1: See what embeddings you have
available = list_available_embeddings(conn)
print(available)

# Step 2: Check if specific model is available
metadata = get_embedding_metadata(conn, 'bge_base_en_v1.5')
if metadata:
    print(f"Found {metadata['embedding_count']} embeddings")
    print(f"Dimension: {metadata['dimension']}")

# Step 3: Load with user prompt (interactive)
db = load_or_generate(conn, 'bge_base_en_v1.5', preserve_existing=None)

# Step 4: Or load automatically (non-interactive, for scripts)
db = load_or_generate(conn, 'bge_base_en_v1.5', preserve_existing=True)

# Step 5: Use for retrieval
if db:
    results = db.similarity_search(query_embedding, top_n=3)
```

### Why Inline Functions Matter

These functions are defined inline in this notebook (not in a separate module) because:

1. **Pedagogical clarity** - You see the full implementation as you learn
2. **Copy-paste pattern** - You can easily copy them into your own notebooks
3. **Low friction** - No import statements or dependency setup needed
4. **Educational value** - Understanding the code matters more than packaging efficiency

In a production system, these would be in a shared utility module. But for learning, inline is better!

In [None]:
def get_embedding_metadata(db_connection, model_alias):
    """Fetch detailed metadata for a specific embedding model.
    
    Returns info like:
    - Vector dimension (for compatibility checking)
    - Chunk configuration (size, dataset source)
    - When it was created/last used
    - Flexible metadata (stored as JSON)
    """
    with db_connection.cursor() as cur:
        cur.execute('''
            SELECT 
                dimension,
                embedding_count,
                chunk_source_dataset,
                chunk_size_config,
                created_at,
                last_accessed,
                metadata_json
            FROM embedding_registry
            WHERE model_alias = %s
        ''', (model_alias,))
        
        result = cur.fetchone()
        if result:
            return {
                'dimension': result[0],
                'embedding_count': result[1],
                'chunk_source_dataset': result[2],
                'chunk_size_config': result[3],
                'created_at': result[4],
                'last_accessed': result[5],
                'metadata_json': result[6],
            }
        return None

# Retrieve metadata
metadata = get_embedding_metadata(conn, 'bge_base_en_v1_5')

if metadata:
    print("=== Embedding Metadata ===")
    for key, value in metadata.items():
        print(f"{key}: {value}")
else:
    print("No metadata found for 'bge_base_en_v1_5'")

## Key Takeaways

### What You Learned:

1. **Registry Discovery**: Query `embedding_registry` to find cached embeddings
2. **Instant Loading**: Load pre-generated embeddings in <1 second (vs. 50+ min to regenerate)
3. **Load-or-Generate Pattern**: Check registry first, decide whether to load or regenerate
4. **Metadata Access**: Use registry to understand embedding properties (dimension, source, timestamp)
5. **Fast Experimentation**: Once embeddings are registered, you can experiment with techniques rapidly

### Why This Matters:

- **Without registry**: Each advanced technique experiment takes 50 minutes (embedding gen) + 5 minutes (technique) = 55 min
- **With registry**: Each experiment takes <1 second (load) + 5 minutes (technique) = 5 min
- **Result**: 10× faster iteration = more experiments, better learning!

### Next Steps:

1. **intermediate/04**: Learn how to compare multiple embedding models
2. **advanced/05-10**: Use loaded embeddings to experiment with techniques
3. **evaluation-lab**: Compare experiments and measure improvements

---

## Cleanup

In [None]:
# Close database connections
if vector_db:
    vector_db.close()
if conn:
    conn.close()

print("✓ Connections closed")