In [1]:
import sys
import os
from pathlib import Path
project_root = Path.cwd().parent
sys.path.append(str(project_root))

import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger('hey-database-tutorial')


api_key = os.getenv('OPENAI_API_KEY')
db_pwd = os.getenv('POSTGRES_PWD')

recipes_dir = os.path.join(project_root, "configs", "rag_recipes")

In [2]:
from src.config.config_loader import ConfigLoader

config = ConfigLoader.load_config(
    db_config_path=os.path.join(
        project_root, "configs", "DB_connections", "northwind_postgres.yaml"
    ),
    cache_config_path=os.path.join(
        project_root, "configs", "cache", "northwind_cache.yaml"
    ),
    sql_llm_config_path=os.path.join(
        project_root, "configs", "sql_llm", "openai_4o_mini.yaml"
    ),
    vector_store_config_path=os.path.join(
        project_root, "configs", "vector_store", "qdrant_northwind.yaml"
    ),
    prompt_config_path=os.path.join(project_root, "configs", "prompt.yaml"),
    metadata_config_path=os.path.join(project_root, "configs", "metadata_.yaml"),
    base_config_path=os.path.join(project_root, "configs", "base_config.yaml"),
    recipes_dir=recipes_dir
)

In [3]:
config.recipes_configs

[RecipeConfig(name='basic_rag', description='Simple RAG recipe with cosine similarity retrieval', default=True, query_understanding=StrategyConfig(type='PassthroughQueryUnderstanding', params={}), retrieval=StrategyConfig(type='CosineSimRetrieval', params={'tables_limit': 3, 'columns_limit': 5, 'queries_limit': 2, 'use_exact_match': True}), context_processing=StrategyConfig(type='SimpleContextProcessor', params={'include_table_descriptions': True, 'include_column_descriptions': True, 'include_sample_queries': True, 'max_tables': 3, 'max_columns': 10, 'max_queries': 2}), prompt_building=StrategyConfig(type='StandardPromptBuilder', params={'include_original_query': True}), llm_interaction=StrategyConfig(type='DirectLLMInteraction', params={'system_prompt': "You are a SQL expert. Generate valid, executable SQL queries based on the user's question and database schema information.", 'temperature': 0.1, 'max_tokens': 2000}), response_processing=StrategyConfig(type='SQLResponseProcessor', par

In [4]:
from src.build.build_app_components import AppComponentsBuilder
app_components = AppComponentsBuilder(config).build()

  functions.register_function("flatten", flatten)
  from tqdm.autonotebook import tqdm, trange
INFO:hey-database:Connected to PostgreSQL database
INFO:hey-database:Initializing vector store components...
INFO:hey-database:Vector store components initialized successfully
INFO:hey-database:Inizializzazione collezione di recipes RAG
INFO:hey-database:Impostata recipe di default: basic_rag
INFO:hey-database:Inizializzate 1 recipes


In [5]:
from src.metadata.metadata_startup import (
    MetadataStartup,
    MetadataProcessor,
)

metadata_processor = MetadataProcessor(
    table_extractor=app_components.table_metadata_extractor,
    column_extractor=app_components.column_metadata_extractor,
    table_enhancer=app_components.table_metadata_enhancer,
    column_enhancer=app_components.column_metadata_enhancer,
)
metadata_manager = MetadataStartup(metadata_processor, app_components.cache)

In [6]:
metadata = metadata_manager.initialize_metadata()

INFO:hey-database:Found valid cached metadata.


In [7]:
metadata

Metadata(tables={'territories': TableMetadata(name='territories', columns=['territory_id', 'territory_description', 'region_id'], primary_keys=['territory_id'], foreign_keys=[{'constrained_columns': ['region_id'], 'referred_table': 'region', 'referred_columns': ['region_id']}], row_count=53, description='placeholder', keywords=['placeholder'], importance_score=0.8, type='table'), 'order_details': TableMetadata(name='order_details', columns=['order_id', 'product_id', 'unit_price', 'quantity', 'discount'], primary_keys=['order_id', 'product_id'], foreign_keys=[{'constrained_columns': ['order_id'], 'referred_table': 'orders', 'referred_columns': ['order_id']}, {'constrained_columns': ['product_id'], 'referred_table': 'products', 'referred_columns': ['product_id']}], row_count=2155, description='placeholder', keywords=['placeholder'], importance_score=1.0, type='table'), 'employee_territories': TableMetadata(name='employee_territories', columns=['employee_id', 'territory_id'], primary_keys

In [8]:
from src.store.vectorstore_startup import VectorStoreStartup

vector_store_startup = VectorStoreStartup(app_components.vector_store, app_components.vector_store_writer)
vector_store_startup.initialize(metadata)

INFO:hey-database:Initializing vector store collection
INFO:hey-database:Creating new collection: northwind
INFO:hey-database:Collection northwind created successfully
INFO:hey-database:Syncing metadata to vector store (collection is empty)
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 20

True

In [9]:
searcher = app_components.vector_store_searcher

tables = searcher.search_tables("where i can find employees?")
tables

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:hey-database:Table search results count: 3


[TableSearchResult(id='c934f6fc-dda8-5e04-83de-dfbdecb3e563', similarity_score=0.4262426402108602, name='employee_territories', columns=['employee_id', 'territory_id'], primary_keys=['employee_id', 'territory_id'], foreign_keys=[{'constrained_columns': ['employee_id'], 'referred_table': 'employees', 'referred_columns': ['employee_id']}, {'constrained_columns': ['territory_id'], 'referred_table': 'territories', 'referred_columns': ['territory_id']}], row_count=49, description='placeholder', keywords=['placeholder'], importance_score=0.9),
 TableSearchResult(id='1307c9b0-9625-5b0b-bd8e-4b7694275d1d', similarity_score=0.4176551145121873, name='employees', columns=['employee_id', 'last_name', 'first_name', 'title', 'title_of_courtesy', 'birth_date', 'hire_date', 'address', 'city', 'region', 'postal_code', 'country', 'home_phone', 'extension', 'photo', 'notes', 'reports_to', 'photo_path'], primary_keys=['employee_id'], foreign_keys=[{'constrained_columns': ['reports_to'], 'referred_table': 

In [10]:
tables[0]

TableSearchResult(id='c934f6fc-dda8-5e04-83de-dfbdecb3e563', similarity_score=0.4262426402108602, name='employee_territories', columns=['employee_id', 'territory_id'], primary_keys=['employee_id', 'territory_id'], foreign_keys=[{'constrained_columns': ['employee_id'], 'referred_table': 'employees', 'referred_columns': ['employee_id']}, {'constrained_columns': ['territory_id'], 'referred_table': 'territories', 'referred_columns': ['territory_id']}], row_count=49, description='placeholder', keywords=['placeholder'], importance_score=0.9)

In [11]:
from src.services.chat_service import ChatService

chat_service = ChatService(
    recipes_collection=app_components.recipes_collection,
    vector_store=app_components.vector_store,
)

In [12]:
chat_service.process_message(message="show me 5 employees", recipe_name="basic_rag")

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:hey-database:Table search results count: 3
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:hey-database:Column search results count: 5
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:hey-database:Query search results count: 0
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


{'success': True,
 'query': 'SELECT * FROM northwind.employees LIMIT 5',
 'explanation': 'This query retrieves the first 5 records from the employees table in the northwind schema. The result will include various details about the employees, such as their employee_id, last_name, first_name, title, and other relevant information.',
 'results': [{'employee_id': 1,
   'last_name': 'Davolio',
   'first_name': 'Nancy',
   'title': 'Sales Representative',
   'title_of_courtesy': 'Ms.',
   'birth_date': datetime.date(1948, 12, 8),
   'hire_date': datetime.date(1992, 5, 1),
   'address': '507 - 20th Ave. E.\\nApt. 2A',
   'city': 'Seattle',
   'region': 'WA',
   'postal_code': '98122',
   'country': 'USA',
   'home_phone': '(206) 555-9857',
   'extension': '5467',
   'photo': <memory at 0x307936e00>,
   'notes': 'Education includes a BA in psychology from Colorado State University in 1970.  She also completed The Art of the Cold Call.  Nancy is a member of Toastmasters International.',
   'rep