## Setup

In [None]:
import ollama
import psycopg2
import pandas as pd
import time

POSTGRES_CONFIG = {
    'host': 'localhost',
    'port': 5432,
    'database': 'rag_db',
    'user': 'postgres',
    'password': 'postgres',
}

print("Configuration loaded")

## Part 1: List Available Models

In [None]:
conn = psycopg2.connect(
    host=POSTGRES_CONFIG['host'],
    port=POSTGRES_CONFIG['port'],
    database=POSTGRES_CONFIG['database'],
    user=POSTGRES_CONFIG['user'],
    password=POSTGRES_CONFIG['password']
)

query = '''
    SELECT model_alias, model_name, dimension, embedding_count, created_at
    FROM embedding_registry
    ORDER BY created_at DESC
'''
available = pd.read_sql(query, conn)
print("=== Available Embedding Models ===")
print(available.to_string(index=False))
print(f"\nTotal: {len(available)} model(s)")

## Part 2: Compare Retrieval Quality

This section demonstrates how to compare retrieval results across different models.

In [None]:
class PostgreSQLVectorDB:
    def __init__(self, config, table_name):
        self.config = config
        self.table_name = table_name
        self.conn = psycopg2.connect(
            host=config['host'],
            port=config['port'],
            database=config['database'],
            user=config['user'],
            password=config['password']
        )
    
    def get_chunk_count(self):
        with self.conn.cursor() as cur:
            cur.execute(f'SELECT COUNT(*) FROM {self.table_name}')
            return cur.fetchone()[0]
    
    def similarity_search(self, query_embedding, top_n=5):
        with self.conn.cursor() as cur:
            cur.execute(f'''
                SELECT chunk_text, 1 - (embedding <=> %s::vector) as similarity
                FROM {self.table_name}
                ORDER BY embedding <=> %s::vector
                LIMIT %s
            ''', (query_embedding, query_embedding, top_n))
            return [(chunk, score) for chunk, score in cur.fetchall()]
    
    def close(self):
        if self.conn:
            self.conn.close()

print("Vector DB class loaded")

## Key Concepts

### Embedding Model Comparison

To effectively compare embedding models:

1. **Generate embeddings with each model** using foundation/02
2. **Store in separate tables** (embedding_bge_base, embedding_bge_small, etc.)
3. **Query with the same test set** to compare retrieval quality
4. **Measure metrics**: Precision@K, Recall, MRR, response time

### Common Models

- **BGE-Base (768D)**: Balanced quality/speed (current default)
- **BGE-Small (384D)**: Faster, lower quality
- **UAE-Large (768D)**: Higher quality, similar speed

### How to Add a Second Model

1. Install: `ollama pull hf.co/CompendiumLabs/bge-small-en-v1.5-gguf`
2. Edit foundation/02: Change `EMBEDDING_MODEL = 'hf.co/CompendiumLabs/bge-small-en-v1.5-gguf'`
3. Run foundation/02 again (creates new embeddings_bge_small table)
4. Come back here and compare!

---

## Cleanup

In [None]:
conn.close()
print("âœ“ Connections closed")