In [1]:
from vanna.openai import OpenAI_Chat
from vanna.pgvector import PG_VectorStore

from langchain_openai import OpenAIEmbeddings

In [2]:
import os
from dotenv import load_dotenv

load_dotenv()

class DalgoVannaClient(PG_VectorStore, OpenAI_Chat):
    def __init__(self, openai_config={}, pg_vector_config={}):
        PG_VectorStore.__init__(
            self,
            config={
                "connection_string": "postgresql+psycopg://{username}:{password}@{server}:{port}/{database}".format(
                    **{
                        "username": os.environ["PGVECTOR_USER"],
                        "password": os.environ["PGVECTOR_PASSWORD"],
                        "server": os.environ["PGVECTOR_HOST"],
                        "port": os.environ["PGVECTOR_PORT"],
                        "database": os.environ["PGVECTOR_DB"],
                    }
                ),
                **pg_vector_config,
            },
        )
        OpenAI_Chat.__init__(
            self,
            config={
                "api_key": os.environ["OPENAI_API_KEY"],
                "model": "gpt-4o-mini",
                **openai_config,
            },
        )


In [3]:
vn_client = DalgoVannaClient(
    openai_config={
        "initial_prompt": "Please qualify all table names with their schema names in the generated SQL"
    },
    pg_vector_config={"embedding_function": OpenAIEmbeddings()},
)

In [4]:
vn_client.connect_to_postgres(
    host=os.environ["WAREHOUSE_HOST"],
    dbname=os.environ["WAREHOUSE_DBNAME"],
    user=os.environ["WAREHOUSE_USER"],
    password=os.environ["WAREHOUSE_PASSWORD"],
    port=os.environ["WAREHOUSE_PORT"]
)

In [None]:
# traning plan
exclude_schemas = ["airbyte_internal", "dbt_staging_elementary", "pg_catalog"]

quote_schema = lambda schema: f"'{schema}'"

training_sql_query = f"""SELECT * FROM INFORMATION_SCHEMA.COLUMNS WHERE \
table_schema not in (\
{','.join([quote_schema(schema) for schema in exclude_schemas])}\
) """

train_df = vn_client.run_sql(training_sql_query)

In [7]:
training_sql_query

"SELECT * FROM INFORMATION_SCHEMA.COLUMNS WHERE table_schema not in ('airbyte_internal','dbt_staging_elementary','pg_catalog') "

In [8]:
train_df["table_schema"].value_counts()

table_schema
information_schema    649
mongo_staging         291
test_ledger           133
compliance            117
dbt_staging            52
Name: count, dtype: int64

In [9]:
training_plan = vn_client.get_training_plan_generic(df=train_df)

In [10]:
vn_client.train(plan=training_plan)

### Janaagraha

In [None]:
chart = [{"chart_name": "City Eligibility", "schema": "mongo_staging", "table": "grants_available"}]

user_question = "Give me the % eligibility across panchayats, municipalities and area councils"

user_question = "Give me the % of panachayats eligible for grants"

vanna_query = f"""
User's Question: {user_question}

Additional Context:
- Chart Title: {chart[0]['chart_name']}
- Schema: {chart[0]['schema']}
- Table: {chart[0]['table']}


Please provide a detailed answer considering the above context.
"""

sql = vn_client.generate_sql(question=vanna_query, allow_llm_to_see_data=False)

SQL Prompt: [{'role': 'system', 'content': "Please qualify all table names with their schema names in the generated SQL\n===Additional Context \n\nThe following columns are in the eligibility_state_percent table in the cityfinance database:\n\n|     | table_catalog   | table_schema   | table_name                | column_name            | data_type         |\n|----:|:----------------|:---------------|:--------------------------|:-----------------------|:------------------|\n| 337 | cityfinance     | mongo_staging  | eligibility_state_percent | eligibility_percentage | numeric           |\n| 338 | cityfinance     | mongo_staging  | eligibility_state_percent | total_cities           | bigint            |\n| 339 | cityfinance     | mongo_staging  | eligibility_state_percent | eligible_cities        | bigint            |\n| 565 | cityfinance     | mongo_staging  | eligibility_state_percent | state                  | character varying |\n| 566 | cityfinance     | mongo_staging  | eligibility