In [1]:
# %%capture --no-stderr

# %pip install --quiet --upgrade langchain-text-splitters langchain-community langgraph
# %pip install -qU "langchain[openai]"
# %pip install -qU langchain-openai
# %pip install -qU langchain-core
# %pip install --upgrade --quiet langgraph langchain-community beautifulsoup4
# %pip install gradio
# %pip install langchain-chroma
# %pip install pypdf

In [2]:
import os
from dotenv import load_dotenv

load_dotenv()

os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGSMITH_API_KEY"] = os.getenv("LANGSMITH_API_KEY")

In [3]:
import os
from dotenv import load_dotenv

load_dotenv()

if not os.environ.get("AZURE_OPENAI_API_KEY"):
  os.environ["AZURE_OPENAI_API_KEY"] = os.getenv("AZURE_OPENAI_API_KEY")

from langchain_openai import AzureChatOpenAI

llm = AzureChatOpenAI(
    azure_endpoint=os.environ["AZURE_OPENAI_CHAT_ENDPOINT"],
    azure_deployment=os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"],
    openai_api_version=os.environ["AZURE_OPENAI_CHAT_API_VERSION"],
)

In [4]:
import os
from dotenv import load_dotenv

load_dotenv()

if not os.environ.get("AZURE_OPENAI_API_KEY"):
  os.environ["AZURE_OPENAI_API_KEY"] = os.getenv("AZURE_OPENAI_API_KEY")

from langchain_openai import AzureOpenAIEmbeddings

embeddings = AzureOpenAIEmbeddings(
    azure_endpoint=os.environ["AZURE_OPENAI_EMBEDDINGS_ENDPOINT"],
    azure_deployment=os.environ["AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME"],
    openai_api_version=os.environ["AZURE_OPENAI_EMBEDDINGS_API_VERSION"],
)

In [5]:
# from langchain_core.vectorstores import InMemoryVectorStore

# vector_store = InMemoryVectorStore(embeddings)
from langchain_community.vectorstores import Chroma
vector_store = Chroma(persist_directory="./vector_store", embedding_function=embeddings)

  vector_store = Chroma(persist_directory="./vector_store", embedding_function=embeddings)


In [6]:
# LOAD CONTEXT

import bs4
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.messages import SystemMessage
from langgraph.prebuilt import ToolNode
from langgraph.graph import MessagesState, StateGraph
from langchain_core.tools import tool


# # Load and chunk contents of the blog
# loader = WebBaseLoader(
#     web_paths=("https://leginfo.legislature.ca.gov/faces/billTextClient.xhtml?bill_id=202320240SB976",),
#     bs_kwargs=dict(
#         parse_only=bs4.SoupStrainer(
#             class_=("tab_content")
#         )
#     ),
# )
# docs = loader.load()

# text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
# all_splits = text_splitter.split_documents(docs)

# # Index chunks
# _ = vector_store.add_documents(documents=all_splits)

graph_builder = StateGraph(MessagesState)


@tool(response_format="content_and_artifact")
def retrieve(query: str):
    """Retrieve information related to a query."""
    retrieved_docs = vector_store.similarity_search(query, k=5)
    serialized = "\n\n".join(
        (f"Source: {doc.metadata}\nContent: {doc.page_content}")
        for doc in retrieved_docs
    )
    return serialized, retrieved_docs

# Step 1: Generate an AIMessage that may include a tool-call to be sent.
def query_or_respond(state: MessagesState):
    """Generate tool call for retrieval or respond."""
    llm_with_tools = llm.bind_tools([retrieve])
    response = llm_with_tools.invoke(state["messages"])
    # MessagesState appends messages to state instead of overwriting
    return {"messages": [response]}


# Step 2: Execute the retrieval.
tools = ToolNode([retrieve])


# Step 3: Generate a response using the retrieved content.
def generate(state: MessagesState):
    """Generate answer."""
    # Get generated ToolMessages
    recent_tool_messages = []
    for message in reversed(state["messages"]):
        if message.type == "tool":
            recent_tool_messages.append(message)
        else:
            break
    tool_messages = recent_tool_messages[::-1]

    # Format into prompt
    docs_content = "\n\n".join(doc.content for doc in tool_messages)
    system_message_content = (
        "You are an assistant for question-answering tasks. "
        "Use the following pieces of retrieved context to answer "
        "the question. If you don't know the answer, say that you "
        "don't know. Use three sentences maximum and keep the "
        "answer concise."
        "\n\n"
        f"{docs_content}"
    )
    conversation_messages = [
        message
        for message in state["messages"]
        if message.type in ("human", "system")
        or (message.type == "ai" and not message.tool_calls)
    ]
    prompt = [SystemMessage(system_message_content)] + conversation_messages

    # Run
    response = llm.invoke(prompt)
    return {"messages": [response]}

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [7]:
# BUILD AND COMPILE GRAPH

from langgraph.graph import END
from langgraph.prebuilt import ToolNode, tools_condition
from langgraph.checkpoint.memory import MemorySaver

graph_builder.add_node(query_or_respond)
graph_builder.add_node(tools)
graph_builder.add_node(generate)

graph_builder.set_entry_point("query_or_respond")
graph_builder.add_conditional_edges(
    "query_or_respond",
    tools_condition,
    {END: END, "tools": "tools"},
)
graph_builder.add_edge("tools", "generate")
graph_builder.add_edge("generate", END)

memory = MemorySaver()
graph = graph_builder.compile(checkpointer=memory)
# Specify an ID for the thread
config = {"configurable": {"thread_id": "abc123"}}


In [8]:
# TEST INPUT

input_message = "Does the following feature comply with the regulations stated? Feature: Universal PF deactivation on guest mode. Description: By default, PF will be turned off for all uses browsing in guest mode."

for step in graph.stream(
    {"messages": [{"role": "user", "content": input_message}]},
    stream_mode="values",
    config=config,
):
    step["messages"][-1].pretty_print()
    


Does the following feature comply with the regulations stated? Feature: Universal PF deactivation on guest mode. Description: By default, PF will be turned off for all uses browsing in guest mode.
Tool Calls:
  retrieve (call_MuukJrKzknT76bIcrvBnf76F)
 Call ID: call_MuukJrKzknT76bIcrvBnf76F
  Args:
    query: Universal PF deactivation on guest mode regulations compliance
Name: retrieve

Source: {'total_pages': 102, 'author': 'Publications Office', 'creationdate': '2022-10-26T17:30:13+02:00', 'moddate': '2022-10-26T17:30:13+02:00', 'source': 'bills/CELEX_32022R2065_EN_TXT.pdf', 'creator': 'Arbortext Advanced Print Publisher 11.2.5235/W-x64', 'producer': 'PDFlib+PDI 9.1.2p4 (C++/Win64)', 'page_label': '32', 'epsprocessor': 'PStill version 1.84.42', 'page': 31, 'title': 'Publications Office'}
Content: disseminating child por nograph y , or to disable access to ser vices that are being used by a third par ty to infr ing e an 
intellectual proper ty r ight, are not reasonably av ailable.
E

In [None]:
import gradio as gr

def rag_chat(user_message, history):
    responses = []
    for step in graph.stream(
        {"messages": [{"role": "user", "content": user_message}]},
        stream_mode="values",
        config=config,
    ):
        if "messages" in step:
            responses.append(step["messages"][-1].content)

            # For debugging
            step["messages"][-1].pretty_print()

    reply = responses[-1] if responses else "No response."
    return reply

demo = gr.ChatInterface(rag_chat, title="Feature Compliance Checker", theme="compact")
demo.launch()



Sorry, we can't find the page you are looking for.
  self.chatbot = Chatbot(


* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.





What is the data security act?
Tool Calls:
  retrieve (call_F4qeM4mUo1Anq0F76o7APHVw)
 Call ID: call_F4qeM4mUo1Anq0F76o7APHVw
  Args:
    query: data security act
Name: retrieve

Source: {'author': 'Publications Office', 'title': 'Publications Office', 'moddate': '2022-10-26T17:30:13+02:00', 'creationdate': '2022-10-26T17:30:13+02:00', 'page_label': '70', 'creator': 'Arbortext Advanced Print Publisher 11.2.5235/W-x64', 'source': 'bills/CELEX_32022R2065_EN_TXT.pdf', 'epsprocessor': 'PStill version 1.84.42', 'page': 69, 'total_pages': 102, 'producer': 'PDFlib+PDI 9.1.2p4 (C++/Win64)'}
Content: ser vice concer ned, including the prot ection of personal data, the protection of confidential inf or mation, in par ticular trade 
secrets, and maintaining the secur ity of their ser vice.
3. For the pur poses of paragraph 1, provid ers of ver y larg e online platf or ms or of ver y larg e online searc h engines shall, 
at the request of either the Digital Ser vice Coordinat or of establishment 