In [None]:
from vanna.openai import OpenAI_Chat
from vanna.pgvector import PG_VectorStore

from langchain_openai import OpenAIEmbeddings

## Sql Generation

In [None]:
import os
from dotenv import load_dotenv

load_dotenv()

class DalgoVannaClient(PG_VectorStore, OpenAI_Chat):
    def __init__(self, openai_config={}, pg_vector_config={}):
        PG_VectorStore.__init__(
            self,
            config={
                "connection_string": "postgresql+psycopg://{username}:{password}@{server}:{port}/{database}".format(
                    **{
                        "username": os.environ["PGVECTOR_USER"],
                        "password": os.environ["PGVECTOR_PASSWORD"],
                        "server": os.environ["PGVECTOR_HOST"],
                        "port": os.environ["PGVECTOR_PORT"],
                        "database": os.environ["PGVECTOR_DB"],
                    }
                ),
                **pg_vector_config,
            },
        )
        OpenAI_Chat.__init__(
            self,
            config={
                "api_key": os.environ["OPENAI_API_KEY"],
                "model": "gpt-4o-mini",
                **openai_config,
            },
        )


In [None]:
vn_client = DalgoVannaClient(
    openai_config={
        "initial_prompt": "Please qualify all table names with their schema names in the generated SQL"
    },
    pg_vector_config={"embedding_function": OpenAIEmbeddings()},
)

In [None]:
vn_client.connect_to_postgres(
    host=os.environ["WAREHOUSE_HOST"],
    dbname=os.environ["WAREHOUSE_DBNAME"],
    user=os.environ["WAREHOUSE_USER"],
    password=os.environ["WAREHOUSE_PASSWORD"],
    port=os.environ["WAREHOUSE_PORT"]
)

In [None]:
train_df = vn_client.run_sql("SELECT * FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema != 'airbyte_internal' ")
training_plan = vn_client.get_training_plan_generic(df=train_df)

In [None]:
vn_client.train(plan=training_plan)

In [None]:
vn_client.generate_sql(question="Get me all the unique sources in the dbt project")