In [1]:
import setup

setup.init_django()

In [2]:
from decouple import config, AutoConfig
config = AutoConfig(search_path="/home/harry/chatbotDjango") 

In [3]:
from analytics.models import PageView
from data.models import BlogPost
from rag import db as rag_db, settings as rag_settings

In [4]:
from sqlalchemy import (
    create_engine,
    inspect,
)

from llama_index.core import SQLDatabase
from llama_index.core.query_engine import NLSQLTableQueryEngine
from llama_index.core.retrievers import NLSQLRetriever

In [5]:
# initialize default LlamaIndex settings
rag_settings.init()
# get pooled Neon database string from .env or env vars
vector_database_url = rag_db.get_database_url(use_pooling=True)

In [6]:
engine = create_engine(vector_database_url)

In [7]:
inspect(engine).get_table_names()

['django_migrations',
 'django_content_type',
 'auth_permission',
 'auth_group',
 'auth_group_permissions',
 'auth_user_groups',
 'auth_user_user_permissions',
 'django_admin_log',
 'auth_user',
 'django_session',
 'products_embedding',
 'products_product',
 'data_blogpost',
 'analytics_pageview']

In [8]:
tables = []
models = [EmployeeRole, Employee, ProductType,
     Product, InventoryItem, ProductInventoryRequirement]
for model in models:
    table = model._meta.db_table
    tables.append(table)

In [9]:
tables

['data_blogpost', 'analytics_pageview']

In [10]:
sql_database = SQLDatabase(engine, include_tables=tables)

  self._metadata.reflect(


In [11]:
sql_query_engine = NLSQLTableQueryEngine(
    sql_database=sql_database,
    tables=tables,
)

In [12]:
response = sql_query_engine.query("What is my most recent blog post id?")
response

  for column in self._inspector.get_columns(table_name, schema=self._schema):


Response(response='Based on the query results, your most recent blog post ID is 29, which was posted on March 13, 2025, at 09:07:28 UTC.', source_nodes=[NodeWithScore(node=TextNode(id_='de0f7f18-34d6-4e01-a2c2-94d7f47edce5', embedding=None, metadata={'sql_query': 'SELECT id, timestamp\nFROM data_blogpost\nORDER BY timestamp DESC\nLIMIT 1;', 'result': [(29, datetime.datetime(2025, 3, 13, 9, 7, 28, 964667, tzinfo=datetime.timezone.utc))], 'col_keys': ['id', 'timestamp']}, excluded_embed_metadata_keys=['sql_query', 'result', 'col_keys'], excluded_llm_metadata_keys=['sql_query', 'result', 'col_keys'], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text='[(29, datetime.datetime(2025, 3, 13, 9, 7, 28, 964667, tzinfo=datetime.timezone.utc))]', mimetype='text/plain', start_char_idx=None, end_char_idx=None, metadata_seperator='\n', text_template='{metadata_str}\n\n{content}'), score=None)], metadata={'de0f7f18-34d6-4e01-a2c2-94d7f47edce5': {'sql_query': 'SELECT i

In [13]:
for node in response.source_nodes:
    print(node.node.get_content())

[(29, datetime.datetime(2025, 3, 13, 9, 7, 28, 964667, tzinfo=datetime.timezone.utc))]


In [14]:
nl_sql_retriever = NLSQLRetriever(
    sql_database, tables=tables, return_raw=True
)

r = nl_sql_retriever.retrieve("What is my least most recent blog post?")

In [15]:
print(r)
for node in r:
    print(node)
    print(node.metadata)

[NodeWithScore(node=TextNode(id_='52b993a7-3e61-40d4-a071-ecd9172fc0af', embedding=None, metadata={'sql_query': 'SELECT id, title, timestamp\nFROM data_blogpost\nORDER BY timestamp ASC\nLIMIT 1;', 'result': [(25, 'Blog Post 1', datetime.datetime(2025, 3, 12, 11, 11, 17, 633185, tzinfo=datetime.timezone.utc))], 'col_keys': ['id', 'title', 'timestamp']}, excluded_embed_metadata_keys=['sql_query', 'result', 'col_keys'], excluded_llm_metadata_keys=['sql_query', 'result', 'col_keys'], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text="[(25, 'Blog Post 1', datetime.datetime(2025, 3, 12, 11, 11, 17, 633185, tzinfo=datetime.timezone.utc))]", mimetype='text/plain', start_char_idx=None, end_char_idx=None, metadata_seperator='\n', text_template='{metadata_str}\n\n{content}'), score=None)]
Node ID: 52b993a7-3e61-40d4-a071-ecd9172fc0af
Text: [(25, 'Blog Post 1', datetime.datetime(2025, 3, 12, 11, 11, 17,
633185, tzinfo=datetime.timezone.utc))]
Score: None

{'sql_qu