## Import Dependencies

In [None]:
import ollama
import psycopg2
import json
import pandas as pd
from datetime import datetime

## Configuration

In [None]:
# PostgreSQL connection
POSTGRES_CONFIG = {
    'host': 'localhost',
    'port': 5432,
    'database': 'rag_db',
    'user': 'postgres',
    'password': 'postgres',
}

# Test embedding model
EMBEDDING_MODEL = 'hf.co/CompendiumLabs/bge-base-en-v1.5-gguf'
LANGUAGE_MODEL = 'hf.co/bartowski/Llama-3.2-1B-Instruct-GGUF'

## Part 1: Discover Available Embeddings

**What's in the registry?** Query the embedding_registry table to see what models and embeddings you have available.

In [None]:
def list_available_embeddings(db_connection):
    """Query embedding_registry to show available models with metadata.
    
    Returns:
        DataFrame with columns: model_alias, model_name, dimension, embedding_count, etc.
    """
    query = '''
        SELECT 
            model_alias,
            model_name,
            dimension,
            embedding_count,
            chunk_source_dataset,
            chunk_size_config,
            created_at,
            last_accessed
        FROM embedding_registry
        ORDER BY created_at DESC
    '''
    return pd.read_sql(query, db_connection)

# Connect and discover embeddings
try:
    conn = psycopg2.connect(
        host=POSTGRES_CONFIG['host'],
        port=POSTGRES_CONFIG['port'],
        database=POSTGRES_CONFIG['database'],
        user=POSTGRES_CONFIG['user'],
        password=POSTGRES_CONFIG['password']
    )
    print(f'✓ Connected to PostgreSQL')
    
    # Query registry
    available = list_available_embeddings(conn)
    
    if available.empty:
        print("\n⚠️  No embeddings found in registry!")
        print("\nTo populate the registry, run: foundation/02-rag-postgresql-persistent.ipynb")
    else:
        print("\n=== Available Embeddings ===")
        print(available.to_string(index=False))
        
except psycopg2.OperationalError as e:
    print(f'✗ Failed to connect: {e}')
    print("Make sure PostgreSQL is running")

## Part 2: Load Embeddings from Registry

**Now that you know what embeddings exist, load them!** This is instant (no regeneration needed).

In [None]:
class PostgreSQLVectorDB:
    """Helper to load embeddings from PostgreSQL without regeneration."""
    
    def __init__(self, config, table_name):
        self.config = config
        self.table_name = table_name
        self.conn = psycopg2.connect(
            host=config['host'],
            port=config['port'],
            database=config['database'],
            user=config['user'],
            password=config['password']
        )
        print(f'✓ Connected to table: {table_name}')
    
    def get_chunk_count(self):
        """How many embeddings are stored?"""
        with self.conn.cursor() as cur:
            cur.execute(f'SELECT COUNT(*) FROM {self.table_name}')
            return cur.fetchone()[0]
    
    def similarity_search(self, query_embedding, top_n=3):
        """Retrieve most similar chunks using pgvector."""
        with self.conn.cursor() as cur:
            cur.execute(f'''
                SELECT chunk_text, 
                       1 - (embedding <=> %s::vector) as similarity
                FROM {self.table_name}
                ORDER BY embedding <=> %s::vector
                LIMIT %s
            ''', (query_embedding, query_embedding, top_n))
            
            results = cur.fetchall()
            return [(chunk, score) for chunk, score in results]
    
    def close(self):
        if self.conn:
            self.conn.close()

# Load embeddings from registry (instant - no regeneration)
EMBEDDING_MODEL_ALIAS = 'bge_base_en_v1_5'  # Match what foundation/02 registered
TABLE_NAME = f'embeddings_{EMBEDDING_MODEL_ALIAS.replace(".", "_")}'

try:
    vector_db = PostgreSQLVectorDB(POSTGRES_CONFIG, TABLE_NAME)
    count = vector_db.get_chunk_count()
    
    print(f'\n✓ Loaded {count:,} embeddings from PostgreSQL (instant! no regeneration needed)')
    print(f'  Table: {TABLE_NAME}')
    print(f'  Time: <1 second (vs. 50+ minutes to regenerate)')
    
except Exception as e:
    print(f'✗ Could not load embeddings: {e}')
    print(f'\nMake sure foundation/02-rag-postgresql-persistent.ipynb has been run')

## Part 3: Retrieve Using Loaded Embeddings

**Now you can perform RAG operations instantly using the pre-generated embeddings!**

In [None]:
def retrieve(query, top_n=3):
    """Retrieve most relevant chunks for a query using loaded embeddings."""
    # Generate query embedding (fast, only for this one query)
    query_embedding = ollama.embed(model=EMBEDDING_MODEL, input=query)['embeddings'][0]
    
    # Search against all the pre-stored embeddings (instant)
    return vector_db.similarity_search(query_embedding, top_n=top_n)

# Test retrieval with loaded embeddings
test_query = "What is photosynthesis?"
print(f"Query: '{test_query}'\n")

results = retrieve(test_query, top_n=3)
print("Retrieved chunks:")
for i, (chunk, score) in enumerate(results, 1):
    title = chunk.split('\n')[0]
    preview = chunk[:200].replace('\n', ' ') + '...'
    print(f"\n  [{i}] Similarity: {score:.4f}")
    print(f"      {title}")
    print(f"      {preview}")

## Part 4: The Load-or-Generate Pattern

**Best Practice:** Check registry first, then decide to load or generate.

In [None]:
def load_or_generate(db_connection, embedding_model_alias, preserve_existing=True):
    """Smart pattern: Check registry, load if exists and compatible, generate if not.
    
    This is the KEY PATTERN for the advanced learning system:
    - If embeddings exist and are compatible → load them (instant)
    - If embeddings don't exist → prompt user to run foundation/02 first
    - If different config needed → user chooses to regenerate
    
    Args:
        db_connection: PostgreSQL connection
        embedding_model_alias: Model to look for (e.g., 'bge_base_en_v1_5')
        preserve_existing: If True, always load; if False, regenerate
    
    Returns:
        PostgreSQLVectorDB instance ready for use
    """
    # Step 1: Check if embeddings exist in registry
    with db_connection.cursor() as cur:
        cur.execute('''
            SELECT id, dimension, embedding_count, metadata_json
            FROM embedding_registry
            WHERE model_alias = %s
        ''', (embedding_model_alias,))
        registry_entry = cur.fetchone()
    
    if registry_entry:
        reg_id, dimension, embedding_count, metadata = registry_entry
        
        print(f"✓ Found existing embeddings for '{embedding_model_alias}'")
        print(f"  Embeddings: {embedding_count:,}")
        print(f"  Dimension: {dimension}")
        
        if preserve_existing:
            print(f"  Loading from PostgreSQL (no regeneration needed)")
            
            # Return ready-to-use database
            table_name = f'embeddings_{embedding_model_alias.replace(".", "_")}'
            return PostgreSQLVectorDB({
                'host': 'localhost',
                'port': 5432,
                'database': 'rag_db',
                'user': 'postgres',
                'password': 'postgres',
            }, table_name)
        else:
            print(f"  Would regenerate (preserve_existing=False)")
            print(f"  NOT IMPLEMENTED - would need dataset + generation code")
    else:
        print(f"✗ No embeddings found for '{embedding_model_alias}'")
        print(f"\n  To create embeddings, run:")
        print(f"  foundation/02-rag-postgresql-persistent.ipynb")
        return None

# Example: Use the load-or-generate pattern
print("=== Load-or-Generate Pattern ===")
loaded_db = load_or_generate(conn, 'bge_base_en_v1_5', preserve_existing=True)

if loaded_db:
    print(f"\n✓ Ready to use! {loaded_db.get_chunk_count():,} embeddings loaded")

## Part 5: Query Registry Metadata

**Advanced:** Access detailed metadata about registered embeddings.

In [None]:
def get_embedding_metadata(db_connection, model_alias):
    """Fetch detailed metadata for a specific embedding model.
    
    Returns info like:
    - Vector dimension (for compatibility checking)
    - Chunk configuration (size, dataset source)
    - When it was created/last used
    - Flexible metadata (stored as JSON)
    """
    with db_connection.cursor() as cur:
        cur.execute('''
            SELECT 
                dimension,
                embedding_count,
                chunk_source_dataset,
                chunk_size_config,
                created_at,
                last_accessed,
                metadata_json
            FROM embedding_registry
            WHERE model_alias = %s
        ''', (model_alias,))
        
        result = cur.fetchone()
        if result:
            return {
                'dimension': result[0],
                'embedding_count': result[1],
                'chunk_source_dataset': result[2],
                'chunk_size_config': result[3],
                'created_at': result[4],
                'last_accessed': result[5],
                'metadata_json': result[6],
            }
        return None

# Retrieve metadata
metadata = get_embedding_metadata(conn, 'bge_base_en_v1_5')

if metadata:
    print("=== Embedding Metadata ===")
    for key, value in metadata.items():
        print(f"{key}: {value}")
else:
    print("No metadata found for 'bge_base_en_v1_5'")

## Key Takeaways

### What You Learned:

1. **Registry Discovery**: Query `embedding_registry` to find cached embeddings
2. **Instant Loading**: Load pre-generated embeddings in <1 second (vs. 50+ min to regenerate)
3. **Load-or-Generate Pattern**: Check registry first, decide whether to load or regenerate
4. **Metadata Access**: Use registry to understand embedding properties (dimension, source, timestamp)
5. **Fast Experimentation**: Once embeddings are registered, you can experiment with techniques rapidly

### Why This Matters:

- **Without registry**: Each advanced technique experiment takes 50 minutes (embedding gen) + 5 minutes (technique) = 55 min
- **With registry**: Each experiment takes <1 second (load) + 5 minutes (technique) = 5 min
- **Result**: 10× faster iteration = more experiments, better learning!

### Next Steps:

1. **intermediate/04**: Learn how to compare multiple embedding models
2. **advanced/05-10**: Use loaded embeddings to experiment with techniques
3. **evaluation-lab**: Compare experiments and measure improvements

---

## Cleanup

In [None]:
# Close database connections
if vector_db:
    vector_db.close()
if conn:
    conn.close()

print("✓ Connections closed")