In [1]:
import setup

setup.init_django()

In [2]:
from rag import (
    db as rag_db, 
    engines as rag_engines,
    settings as rag_settings, 
    updaters as rag_updaters,
)

In [3]:
from typing import Optional, Union
from sqlalchemy import create_engine, text

In [4]:
rag_settings.init()
rag_db.init_vector_db()
rag_updaters.update_llama_index_documents(use_saved_embeddings=True)

In [5]:
vector_index = rag_engines.get_semantic_query_index()
semantic_query_retriever = rag_engines.get_semantic_query_retriever_engine()
sql_query_engine = rag_engines.get_sql_query_engine()

In [6]:
print(rag_settings.VECTOR_DB_NAME, rag_settings.VECTOR_DB_TABLE_NAME)

vector_db blogpost


In [7]:
from llama_index.core.tools import QueryEngineTool

vector_tool = QueryEngineTool.from_defaults(
    query_engine=semantic_query_retriever,
    description=(
        f"Useful for answering semantic questions about different blog posts"
    ),
)

In [8]:
sql_tool = QueryEngineTool.from_defaults(
    query_engine=sql_query_engine,
    description=(
        "Useful for translating a natural language query into a SQL query over"
        " a table containing: blog posts and page views each blog post"
    ),
)

In [18]:
from typing import Any, Optional, Union

# Import necessary classes from llama_index
from llama_index.core.llms import LLM
from llama_index.core.prompts import BasePromptTemplate
from llama_index.core.callbacks import CallbackManager
from llama_index.core.selectors import LLMSingleSelector, PydanticSingleSelector
from llama_index.core.query_engine import SQLAutoVectorQueryEngine, RetrieverQueryEngine
from llama_index.core.query_engine.sql_vector_query_engine import *

class MySQLAutoVectorQueryEngine(SQLAutoVectorQueryEngine):
    def __init__(
        self,
        sql_query_tool: QueryEngineTool,
        vector_query_tool: QueryEngineTool,
        selector: Optional[Union[LLMSingleSelector, PydanticSingleSelector]] = None,
        llm: Optional[LLM] = None,
        sql_vector_synthesis_prompt: Optional[BasePromptTemplate] = None,
        sql_augment_query_transform: Optional[SQLAugmentQueryTransform] = None,
        use_sql_vector_synthesis: bool = True,
        callback_manager: Optional[CallbackManager] = None,
        verbose: bool = True,
    ) -> None:
        """Initialize params."""
        # Get the SQL vector synthesis prompt
        sql_vector_synthesis_prompt = (
            sql_vector_synthesis_prompt or DEFAULT_SQL_VECTOR_SYNTHESIS_PROMPT
        )
        
        # Initialize using SQLJoinQueryEngine's parameters
        SQLJoinQueryEngine.__init__(
            self,
            sql_query_tool=sql_query_tool,
            other_query_tool=vector_query_tool,  # This is the key change!
            selector=selector,
            llm=llm,
            sql_join_synthesis_prompt=sql_vector_synthesis_prompt,
            sql_augment_query_transform=sql_augment_query_transform,
            use_sql_join_synthesis=use_sql_vector_synthesis,
            callback_manager=callback_manager,
            verbose=verbose
        )

In [19]:
query_engine = MySQLAutoVectorQueryEngine(
    sql_tool, 
    vector_tool,
)

In [21]:
response = query_engine.query(
    "What kind of org is discussed?"
)

[1;3;34mQuerying other query engine: The question 'What kind of org is discussed?' is semantic in nature and likely refers to the content of different blog posts, making choice (2) more relevant.
[0m

In [22]:
response.response

'The kind of org discussed is one that is organized, with systems, charts, and boxes, and requires approval for changes. It is contrasted with a more adaptable and resilient organism that constantly changes and adapts to its environment.'

In [24]:
response = query_engine.query(
    "Are are the top 5 most viewed blog posts? What keywords do their content have?"
)

[1;3;34mQuerying SQL database: This choice is most relevant because it mentions translating a query into a SQL query over a table containing blog posts and page views, which is necessary to determine the top 5 most viewed blog posts.
[1;3;33mSQL query: SELECT
    db.id,
    db.title,
    db.content,
    COUNT(ap.id) AS view_count
FROM
    data_blogpost db
JOIN
    analytics_pageview ap ON db.id = ap.post_id
GROUP BY
    db.id
ORDER BY
    view_count DESC
LIMIT 5;
[0m[1;3;33mSQL response: Based on the query results, the top 5 most viewed blog posts are:

1. **"Blog Post 1"** with 3,208 views.
   - Keywords: Harry, Here, before, you

2. **"Blog Post 2"** with 2,201 views.
   - Keywords: You, Here, before, Harry

3. **"Blog Post 3"** with 1,761 views.
   - Keywords: Harry, Was, not, Here

4. **"What kind of org?"** with 1,235 views.
   - Keywords: organization, systems, charts, boxes, organism, changes

5. **"Taking it very seriously"** with 1,125 views.
   - Keywords: April first, gr

In [25]:
from IPython.display import Markdown, display

display(Markdown(response.response))

Based on the provided information, here's a synthesized response to the original question:

The top 5 most viewed blog posts are:

1. **"Blog Post 1"** with 3,208 views.
   - Keywords: Harry, Here, before, you

2. **"Blog Post 2"** with 2,201 views.
   - Keywords: You, Here, before, Harry

3. **"Blog Post 3"** with 1,761 views.
   - Keywords: Harry, Was, not, Here

4. **"What kind of org?"** with 1,235 views.
   - Keywords: organization, systems, charts, boxes, organism, changes

5. **"Taking it very seriously"** with 1,125 views.
   - Keywords: April first, greeting, New Year’s, date, Happy, internet, smile, apocalypse

Additionally, considering the vector store response, here are some more blog posts and their keywords:

1. **"Communicating online (the big leaps)"**
   - Keywords: Attention, spam, permission, asynchronous, synchronous, conversation, anonymous.

2. **"Acknowledgments 2020"**
   - Keywords: Gratitude, healthcare workers, frontline workers, Zoom, entrepreneurs, teachers, community.

3. **"More information is a competitive advantage, but it’s not enough"**
   - Keywords: Domain knowledge, judgment, competitive advantage, learning.

4. **"And now, what’s next?"**
   - Keywords: Contribution, learning, possibility, connection, leadership, opportunity.

5. **"But what could you learn instead?"**
   - Keywords: Education, learning, creativity, art, culture, change, mastery.

These keywords are derived directly from the content of each blog post and provide insight into the topics covered in each of the top viewed posts.

In [26]:
response = query_engine.query(
    "What are the top 5 least viewed blog posts from today?"
)
print(response.response)

[1;3;34mQuerying SQL database: This choice is relevant because it mentions translating a natural language query into a SQL query over a table containing blog posts and page views, which is necessary to determine the top 5 least viewed blog posts from today.
[1;3;33mSQL query: SELECT
    db.id,
    db.title,
    COUNT(ap.id) AS view_count
FROM
    data_blogpost db
LEFT JOIN
    analytics_pageview ap ON db.id = ap.post_id
WHERE
    ap.timestamp >= CURRENT_DATE
GROUP BY
    db.id, db.title
ORDER BY
    view_count ASC
LIMIT 5;
[0m[1;3;33mSQL response: Based on the SQL response, there are no blog posts viewed today. Therefore, I cannot provide a list of the top 5 least viewed blog posts. It appears that either there have been no views on any blog posts today, or there are no blog posts available for viewing.
[1;3;34mTransformed query given SQL response: Are there any blog posts available for viewing today?
[1;3;38;5;200mquery engine response: Yes, there are several blog posts availabl

In [27]:
display(Markdown(response.response))

Based on the information provided, here's a synthesized response to the original question:

"Currently, there are no viewed blog posts today, so I cannot list the top 5 least viewed blog posts. However, I can confirm that there are several blog posts available for viewing today. Some of the topics covered include:

- The advantages and disadvantages of using bulletins versus bulletin boards for communicating updates.
- Acknowledgments of various individuals and groups for their efforts and support.
- The key differences between communicating online and in the real world.
- The importance of good judgment and thoughtful points of view in addition to domain knowledge.
- The spread of panic and calm, and how to cultivate calm in various environments.

Since there are blog posts available, it seems that the lack of data in the SQL response is due to no views on any blog posts today."