##Amazon Book Review Insights Agent

In [0]:
%pip install -U -qqqq mlflow-skinny[databricks] databricks-langchain databricks-agents uv langgraph==0.3.4
dbutils.library.restartPython()

[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.[0m


In [0]:
from langchain_community.chat_models import ChatDatabricks
from langchain.tools import Tool
from pyspark.sql import SparkSession
from langchain_core.prompts import PromptTemplate
from langchain.schema.runnable import RunnableLambda, RunnableParallel
from langchain_core.output_parsers import StrOutputParser, PydanticOutputParser
from langchain.prompts import MessagesPlaceholder
import mlflow
from typing import Optional
from langchain_core.messages import HumanMessage, trim_messages, AnyMessage, SystemMessage, ToolMessage, AIMessage
from langchain.agents.agent_toolkits import SQLDatabaseToolkit
from langchain.agents.agent_types import AgentType
from databricks.sdk import WorkspaceClient
from langchain_core.tools import tool
from langchain_core.utils.function_calling import convert_to_openai_function
from langchain.agents import AgentExecutor, create_tool_calling_agent
from langchain_core.prompts import ChatPromptTemplate
import databricks.sql as sql
import base64
from databricks.sdk import WorkspaceClient
from databricks_langchain.genie import GenieAgent
from databricks.sdk import WorkspaceClient
from pydantic import BaseModel
from langgraph.graph import END, StateGraph
from langgraph.graph.state import CompiledStateGraph

from mlflow.langchain.chat_agent_langgraph import ChatAgentState
from mlflow.pyfunc import ChatAgent
from mlflow.types.agent import (
    ChatAgentChunk,
    ChatAgentMessage,
    ChatAgentResponse,
    ChatContext,
)

from typing import Any, Generator, Optional, Sequence, Union

In [0]:
LLM_ENDPOINT_CLAUDE = "databricks-claude-3-7-sonnet"
llm = ChatDatabricks(endpoint=LLM_ENDPOINT_CLAUDE)

  llm = ChatDatabricks(endpoint=LLM_ENDPOINT_CLAUDE)


###Define Planner Agent

In [0]:
def planner_agent(state):
    system_prompt = """
    You are a task planner agent. You are responsible for providing the correct task prompt to the below agents based on the user question and the capabilities of each agent.

    Agent catalog: 
    Genie Agent: 
    - This is an enterprise-grade natural language data agent that can generate SQL queries with schema awarenes for the data it has access to. This genie agent can answer questions about user reviews in the US region of Amazon products (category: Books) and explain the ratings given. 
    - Think of Genie as a data retrieval tool which can either provide aggregated responses or can retrieve the correct data subset which will be used by the Analyzer agent to perform further analysis and reasoning. 
    - Ensure that Genie returns all the relevant fields in its response (e.g., customer_id, product_title, review_headline, review_body, review_date, star_rating, helpful_votes, total_votes)
    - Genie has access to the "reviews_data" table which has the below schema.
        customer_id
        bigint
        Represents a unique identifier for each customer who submitted a review, allowing for tracking and analysis of individual customer behavior.

        review_id
        string
        A unique identifier for each review, which can be used to reference specific feedback provided by customers.

        product_id
        string
        Identifies the specific product being reviewed, enabling analysis of product performance and customer satisfaction on a per-product basis.

        product_title
        string
        The title of the book/ product as listed on Amazon, providing context for the review and helping to identify the book in question.

        star_rating
        bigint
        Indicates the rating given by the customer, on a scale typically from 1 to 5 stars, reflecting their overall satisfaction with the product/ book.

        helpful_votes
        bigint
        The number of users who found the review helpful, which can indicate the review's impact and relevance to other customers.

        total_votes
        bigint
        The total number of votes received for the review, including both helpful and unhelpful votes, providing insight into the review's reception.

        verified_purchase
        string
        Indicates whether the review was submitted by a customer who purchased the product/ book, adding credibility to the feedback.

        review_headline
        string
        A brief summary or title of the review, capturing the essence of the customer's feedback and drawing attention to key points.

        review_body
        string
        The detailed content of the review where customers express their thoughts, experiences, and opinions about the product/ book.

        review_date
        timestamp
        The date when the review was submitted, allowing for analysis of trends over time and the recency of customer feedback.


    Analyzer Agent: 
    - This is a reasoning agent which performs complex tasks like sentiment analysis, trend analysis of the positive or negative reviews across various products over time. 

    Ensure the task prompts generated for each agent are accurate. 

    Your output should be only the task prompts that each agent will use to perform its respective tasks. return in JSON format
        genie_prompt: <str>, 
        analyzer_prompt: <str>
    """

    class TaskPrompt(BaseModel):
        genie_prompt: str
        analyzer_prompt: str

    
    parser = PydanticOutputParser(pydantic_object=TaskPrompt)


    preprocessor = RunnableLambda(lambda state: [{"role": "system", "content": system_prompt}] + state["messages"])


    planner_chain = preprocessor | llm | parser
    response = planner_chain.invoke(state)
    
    genie_prompt = response.genie_prompt
    analyzer_prompt = response.analyzer_prompt

    print("\n\n[PLANNER]: Augmented prompts generated for both Genie and Analyzer")

    return {
        "genie_prompt": genie_prompt,
        "analyzer_prompt": analyzer_prompt
    }

##Define Genie Agent

In [0]:
GENIE_SPACE_ID = <YOUR GENIE SPACE ID>
DATABRICKS_HOST = <YOUR DATABRICKS HOST>
GENIE_PAT = <YOUR DATABRICKS PAT>
genie_agent_description = "This genie agent can answer questions about user reviews of Amazon books and explain the ratings given. "

genie_runnable = GenieAgent(
    genie_space_id=GENIE_SPACE_ID,
    genie_agent_name="Genie",
    description=genie_agent_description
)

def genie_agent(state):
    updated_state = ChatAgentState()
    updated_state["messages"] = [{
        "role": "user",
        "content": state["genie_prompt"]
    }]

    response = genie_runnable.invoke(updated_state)

    print("\n\n[GENIE]: Data subset retrieved")

    return {
        "genie_response": response["messages"][0].content,
        "analyzer_prompt": state["analyzer_prompt"]
    }

Trace(trace_id=tr-5f809761d8ab090d128359b0110355e8)

###Define Analyzer Agent

In [0]:
def analyzer_agent(state):
    genie_content = state["genie_response"]
    user_query = state["messages"][0]["content"]
    system_prompt= f"""
    You are a sentiment/ reviews analyzer agent who is tasked with answering questions about the sentiment (positive, negative, neutral) for the customer reviews based out of US for Amazon products in the Books category. You need to answer the user questions accurately. Provide a comprehensive analysis of the customer feedbacks.
    """
    preprocessor = RunnableLambda(
            lambda state: [{"role": "system", "content": system_prompt}]
            + state["messages"] + [{"role": "user", "content": user_query + "\nBelow is the table you need to analyze." + genie_content}] 
        )

    model_runnable = preprocessor | llm | RunnableLambda(lambda result: {"messages": [result]})
    result = model_runnable.invoke(state)

    print("\n\n[ANALYZER]: Final insights generated.")
    
    return {
        "messages": [
            {
                "role": "assistant",
                "content": result["messages"][-1].content,
                "name": "Analyzer"
            }
        ]
    }

##Build the graph

In [0]:
class AgentState(ChatAgentState):
    genie_prompt: str
    analyzer_prompt: str
    genie_response: str

workflow = StateGraph(AgentState)
workflow.add_node("Genie", genie_agent)
workflow.add_node("Analyzer", analyzer_agent)
workflow.add_node("Planner", planner_agent)

workflow.set_entry_point("Planner")
workflow.add_edge("Planner", "Genie")
workflow.add_edge("Genie", "Analyzer")

workflow.set_finish_point("Analyzer")

multi_agent = workflow.compile()


class LangGraphChatAgent(ChatAgent):
    def __init__(self, agent: CompiledStateGraph):
        self.agent = agent

    def predict(
        self,
        messages: list[ChatAgentMessage],
        context: Optional[ChatContext] = None,
        custom_inputs: Optional[dict[str, Any]] = None,
    ) -> ChatAgentResponse:
        request = {"messages": self._convert_messages_to_dict(messages)}

        messages = []
        for event in self.agent.stream(request, stream_mode="updates"):
            for node_data in event.values():
                messages.extend(
                    ChatAgentMessage(**msg) for msg in node_data.get("messages", [])
                )
        return ChatAgentResponse(messages=messages)

    def predict_stream(
        self,
        messages: list[ChatAgentMessage],
        context: Optional[ChatContext] = None,
        custom_inputs: Optional[dict[str, Any]] = None,
    ) -> Generator[ChatAgentChunk, None, None]:
        request = {"messages": self._convert_messages_to_dict(messages)}
        for event in self.agent.stream(request, stream_mode="updates"):
            for node_data in event.values():
                yield from (
                    ChatAgentChunk(**{"delta": msg}) for msg in node_data["messages"]
                )

mlflow.langchain.autolog()
AGENT = LangGraphChatAgent(multi_agent)
mlflow.models.set_model(AGENT)

##Test the Agent

In [0]:
AGENT.predict({"messages": [
  {
    "role": "user",
    "content": "What are customers saying about Harry Potter books? Which book in the series is the highest rated?"
  }
]})



[PLANNER]: Augmented prompts generated for both Genie and Analyzer


[GENIE]: Data subset retrieved


[ANALYZER]: Final insights generated.


ChatAgentResponse(messages=[ChatAgentMessage(role='assistant', content='# Customer Sentiment Analysis: Harry Potter Books\n\nBased on the reviews provided, here\'s a comprehensive analysis of what customers are saying about Harry Potter books:\n\n## Overall Sentiment\n\nThe majority of reviews for Harry Potter books are overwhelmingly positive, with most books receiving 4-5 star ratings. Fans express deep appreciation for J.K. Rowling\'s storytelling abilities, character development, and the immersive magical world she created.\n\n## Book-Specific Feedback\n\n### Highest Rated Book in the Series\n\nBased on the review data provided, **Harry Potter and the Prisoner of Azkaban (Book 3)** appears to be the highest rated book in the series. Many reviewers specifically mention it as their favorite, with one calling it "flawless, superb" and another describing it as "the most artistic and creatively captivating in the series thus far."\n\nOther highly rated books include:\n- **Harry Potter a

Trace(trace_id=tr-1a0e7f69b38cd9f4da4120f80e668389)

In [0]:
AGENT.predict({"messages": [
  {
    "role": "user",
    "content": "Which books are most often described as life-changing in the reviews?"
  }
]})



[PLANNER]: Augmented prompts generated for both Genie and Analyzer


[GENIE]: Data subset retrieved


[ANALYZER]: Final insights generated.


ChatAgentResponse(messages=[ChatAgentMessage(role='assistant', content='# Books Most Often Described as Life-Changing in Reviews\n\nBased on the customer reviews provided, several books are explicitly described as "life-changing" by reviewers. Here\'s a comprehensive analysis of which books are most frequently mentioned as having transformative effects on readers:\n\n## Top Life-Changing Books\n\n1. **Boundaries: When to Say YES, When to Say NO, To Take Control of Your Life**\n   - Multiple reviewers directly state this book is "life-changing"\n   - One reviewer calls it "the most life-changing book I have ever read (second to the Bible)"\n   - Described as providing practical guidance for setting healthy boundaries\n\n2. **Created to be His Help Meet: Discover How God Can Make Your Marriage Glorious**\n   - Reviewers mention it "changed my life" and had "truly life-changing" effects\n   - Described as completely reconstructing thinking about marriage relationships\n\n3. **The Search F

Trace(trace_id=tr-60b85405389b007875d341d1bc6ce104)