In [1]:
import sys
import os
from pathlib import Path
project_root = Path.cwd().parent
sys.path.append(str(project_root))

import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger('hey-database-tutorial')


api_key = os.getenv('OPENAI_API_KEY')
db_pwd = os.getenv('POSTGRES_PWD')

In [2]:
from src.config.config_loader import ConfigLoader

config = ConfigLoader.load_config(
    db_config_path=os.path.join(
        project_root, "configs", "DB_connections", "northwind_postgres.yaml"
    ),
    cache_config_path=os.path.join(
        project_root, "configs", "cache", "northwind_cache.yaml"
    ),
    sql_llm_config_path=os.path.join(
        project_root, "configs", "sql_llm", "openai_4o_mini.yaml"
    ),
    vector_store_config_path=os.path.join(
        project_root, "configs", "vector_store", "qdrant_northwind.yaml"
    ),
    prompt_config_path=os.path.join(project_root, "configs", "prompt.yaml"),
    metadata_config_path=os.path.join(project_root, "configs", "metadata_.yaml"),
    base_config_path=os.path.join(project_root, "configs", "base_config.yaml"),
)

In [3]:
from src.build.build_app_components import AppComponentsBuilder
app_components = AppComponentsBuilder(config).build()

  functions.register_function("flatten", flatten)
  from tqdm.autonotebook import tqdm, trange
INFO:hey-database:Connected to PostgreSQL database
INFO:hey-database:Vector store enabled, initializing client...


In [4]:
from src.metadata.metadata_startup import (
    MetadataManager,
    MetadataProcessor,
)

metadata_processor = MetadataProcessor(
    table_extractor=app_components.table_metadata_extractor,
    column_extractor=app_components.column_metadata_extractor,
    table_enhancer=app_components.table_metadata_enhancer,
    column_enhancer=app_components.column_metadata_enhancer,
)
metadata_manager = MetadataManager(metadata_processor, app_components.cache)

In [5]:
metadata_manager.initialize_metadata()

INFO:hey-database:Found valid cached metadata.


True

In [6]:
metadata_manager.metadata.columns

{'territories': {'territory_id': {'name': 'territory_id',
   'table': 'territories',
   'data_type': 'VARCHAR(20)',
   'nullable': False,
   'is_primary_key': False,
   'is_foreign_key': False,
   'distinct_values': ['90405',
    '30346',
    '48084',
    '95054',
    '20852',
    '03049',
    '72716',
    '01730',
    '75234',
    '07960'],
   'ai_name': 'placeholder',
   'description': 'placeholder',
   'keywords': ['placeholder']},
  'territory_description': {'name': 'territory_description',
   'table': 'territories',
   'data_type': 'VARCHAR(60)',
   'nullable': False,
   'is_primary_key': False,
   'is_foreign_key': False,
   'distinct_values': ['Neward',
    'Santa Cruz',
    'Columbia',
    'New York',
    'Westboro',
    'Philadelphia',
    'Phoenix',
    'San Francisco',
    'Dallas',
    'Roseville'],
   'ai_name': 'placeholder',
   'description': 'placeholder',
   'keywords': ['placeholder']},
  'region_id': {'name': 'region_id',
   'table': 'territories',
   'data_type': 'S