In [1]:
import setup

setup.init_django()

In [2]:
from rag import (
    db as rag_db, 
    engines as rag_engines,
    settings as rag_settings, 
    updaters as rag_updaters,
    patches as rag_patches,
)

In [3]:
from typing import Optional, Union
from sqlalchemy import create_engine, text

In [4]:
rag_settings.init()
rag_db.init_vector_db()
rag_updaters.update_llama_index_documents(use_saved_embeddings=True)

In [5]:
vector_index = rag_engines.get_semantic_query_index()
semantic_query_retriever = rag_engines.get_semantic_query_retriever_engine()
sql_query_engine = rag_engines.get_sql_query_engine()

In [6]:
print(rag_settings.VECTOR_DB_NAME, rag_settings.VECTOR_DB_TABLE_NAME)

vector_db blogpost


In [7]:
from llama_index.core.tools import QueryEngineTool

vector_tool = QueryEngineTool.from_defaults(
    query_engine=semantic_query_retriever,
    description=(
        f"Useful for answering semantic questions about different blog posts"
    ),
)

In [8]:
sql_tool = QueryEngineTool.from_defaults(
    query_engine=sql_query_engine,
    description=(
        "Useful for translating a natural language query into a SQL query over"
        " a table containing: blog posts and page views each blog post"
    ),
)

In [9]:
query_engine = rag_patches.MySQLAutoVectorQueryEngine(
    sql_tool, 
    vector_tool,
)

In [10]:
response = query_engine.query(
    "What do you make?"
)

[1;3;34mQuerying other query engine: The question 'What do you make?' is semantic in nature and likely refers to the content or type of blog posts, making choice (2) more relevant.
[0m

In [11]:
response.response

'I make decisions.'

In [12]:
response = query_engine.query(
    "Are are the top 5 most viewed blog posts? What keywords do their content have?"
)

[1;3;34mQuerying SQL database: This choice is relevant because it mentions translating a query into a SQL query over a table containing blog posts and page views, which is necessary to determine the top 5 most viewed blog posts.
[1;3;33mSQL query: SELECT
    db.title,
    db.content,
    COUNT(ap.id) AS view_count
FROM
    data_blogpost db
JOIN
    analytics_pageview ap ON db.id = ap.post_id
GROUP BY
    db.id
ORDER BY
    view_count DESC
LIMIT 5;
[0m[1;3;33mSQL response: Based on the query results, the top 5 most viewed blog posts are:

1. **"Blog Post 1"** with 3,208 views.
   - Keywords: Harry, before you

2. **"Blog Post 2"** with 2,201 views.
   - Keywords: You, before Harry

3. **"Blog Post 3"** with 1,761 views.
   - Keywords: Harry, not here

4. **"What kind of org?"** with 1,235 views.
   - Keywords: organization, systems, charts, boxes, organism, changes

5. **"Taking it very seriously"** with 1,125 views.
   - Keywords: April first, greeting, New Year’s, date, internet, 

In [13]:
from IPython.display import Markdown, display

display(Markdown(response.response))

Based on the query results, the top 5 most viewed blog posts are:

1. **"Blog Post 1"** with 3,208 views.
   - Keywords: Harry, before you

2. **"Blog Post 2"** with 2,201 views.
   - Keywords: You, before Harry

3. **"Blog Post 3"** with 1,761 views.
   - Keywords: Harry, not here

4. **"What kind of org?"** with 1,235 views.
   - Keywords: organization, systems, charts, boxes, organism, changes

5. **"Taking it very seriously"** with 1,125 views.
   - Keywords: April first, greeting, New Year’s, date, internet, apocalypse

These keywords are derived from the content of each blog post and represent the most prominent topics or phrases within each post.

In [14]:
response = query_engine.query(
    "What are the top 5 least viewed blog posts in the year 2024 to 2025?"
)
print(response.response)

[1;3;34mQuerying SQL database: The question requires translating a natural language query into a SQL query to determine the top 5 least viewed blog posts within a specific time frame, which aligns with the capability described in choice (1).
[1;3;33mSQL query: SELECT
    db.id,
    db.title,
    COUNT(ap.id) AS view_count
FROM
    data_blogpost db
LEFT JOIN
    analytics_pageview ap ON db.id = ap.post_id
WHERE
    ap.timestamp BETWEEN '2024-01-01' AND '2025-12-31'
GROUP BY
    db.id, db.title
ORDER BY
    view_count ASC
LIMIT 5;
[0m[1;3;33mSQL response: Based on the query results, the top 5 least viewed blog posts from the year 2024 to 2025 are as follows:

1. **"Forward"** with 161 views.
2. **"GenC"** with 206 views.
3. **"Toward better"** with 230 views.
4. **"Bulletins vs bulletin boards"** with 250 views.
5. **"Generous isn’t always the same as free"** with 261 views.

These blog posts had the lowest view counts within the specified time frame.
[1;3;34mTransformed query given

In [15]:
display(Markdown(response.response))

Based on the query results, the top 5 least viewed blog posts from the year 2024 to 2025 are as follows:

1. **"Forward"** with 161 views.
2. **"GenC"** with 206 views.
3. **"Toward better"** with 230 views.
4. **"Bulletins vs bulletin boards"** with 250 views.
5. **"Generous isn’t always the same as free"** with 261 views.

These blog posts had the lowest view counts within the specified time frame.