In [None]:
!pip install langchain-openai langgraph langchain_community pypdf gradio

Collecting langchain-openai
  Downloading langchain_openai-0.1.21-py3-none-any.whl.metadata (2.6 kB)
Collecting langgraph
  Downloading langgraph-0.2.3-py3-none-any.whl.metadata (13 kB)
Collecting langchain_community
  Downloading langchain_community-0.2.12-py3-none-any.whl.metadata (2.7 kB)
Collecting pypdf
  Downloading pypdf-4.3.1-py3-none-any.whl.metadata (7.4 kB)
Collecting gradio
  Downloading gradio-4.41.0-py3-none-any.whl.metadata (15 kB)
Collecting langchain-core<0.3.0,>=0.2.29 (from langchain-openai)
  Downloading langchain_core-0.2.30-py3-none-any.whl.metadata (6.2 kB)
Collecting openai<2.0.0,>=1.40.0 (from langchain-openai)
  Downloading openai-1.40.6-py3-none-any.whl.metadata (22 kB)
Collecting tiktoken<1,>=0.7 (from langchain-openai)
  Downloading tiktoken-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Collecting langgraph-checkpoint<2.0.0,>=1.0.2 (from langgraph)
  Downloading langgraph_checkpoint-1.0.2-py3-none-any.whl.metadata (4.6 k

In [None]:
!pip install langgraph-checkpoint-sqlite

Collecting langgraph-checkpoint-sqlite
  Downloading langgraph_checkpoint_sqlite-1.0.0-py3-none-any.whl.metadata (3.1 kB)
Collecting aiosqlite<0.21.0,>=0.20.0 (from langgraph-checkpoint-sqlite)
  Downloading aiosqlite-0.20.0-py3-none-any.whl.metadata (4.3 kB)
Downloading langgraph_checkpoint_sqlite-1.0.0-py3-none-any.whl (11 kB)
Downloading aiosqlite-0.20.0-py3-none-any.whl (15 kB)
Installing collected packages: aiosqlite, langgraph-checkpoint-sqlite
Successfully installed aiosqlite-0.20.0 langgraph-checkpoint-sqlite-1.0.0


In [None]:
from langchain_openai import ChatOpenAI
from langgraph.graph import add_messages, StateGraph
from langgraph.prebuilt import ToolNode, tools_condition
from typing_extensions import TypedDict
from langgraph.checkpoint.sqlite import SqliteSaver
from pypdf import PdfReader
import os
import gradio as gr
from typing import Annotated

### State is a dictionary type that stores the conversation history. This will help the chatbot remember previous interactions.

### We define the State class that will hold the messages exchanged between the user and the chatbot.


In [None]:
class State(TypedDict):
    messages: Annotated[list, add_messages]

### This block creates a tool to extract text from PDFs and search through it.

### This class handles loading and processing PDFs from a directory, extracting text, and searching for specific queries within the text. It's designed to help the chatbot find relevant information in your PDF collection.

In [None]:
class PDFSearchTool:
    def __init__(self, pdf_dir):
        self.pdf_dir = pdf_dir
        self.pdf_texts = self.load_pdfs()

    def load_pdfs(self):
        pdf_texts = {}
        for filename in os.listdir(self.pdf_dir):
            if filename.endswith(".pdf"):
                filepath = os.path.join(self.pdf_dir, filename)
                pdf_texts[filename] = self.extract_text_from_pdf(filepath)
        return pdf_texts

    def extract_text_from_pdf(self, filepath):
        reader = PdfReader(filepath)
        text = ""
        for page in reader.pages:
            text += page.extract_text()
        return text

    def search(self, query):
        results = []
        for filename, text in self.pdf_texts.items():
            if query.lower() in text.lower():
                results.append({
                    "filename": filename,
                    "snippet": text[:500]  # Return the first 500 characters as a snippet
                })
        return {"results": results}

    def __call__(self, query):
        return self.search(query)

In [None]:
# Initialize PDF Search Tool
pdf_tool = PDFSearchTool(pdf_dir="/content/")  # Replace with the path to your PDF folder
 # Wrapper function
def pdf_search_tool(query):
    return pdf_tool(query)

In [None]:
pdf_search_tool("Give a Description of sample response files")

{'results': []}

In [None]:
tools =[pdf_search_tool]

In [None]:
tools

[<function __main__.pdf_search_tool(query)>]

### Initialize LLM

In [None]:
# !pip install langchain_openai

In [None]:
from langchain_openai import AzureChatOpenAI
# !pip install python-dotenv
api_key = "bbf9640b5603473b8200e4bb9c0d03d3"

In [None]:
llm = AzureChatOpenAI(
    openai_api_version="2023-03-15-preview",  # e.g., "2023-12-01-preview"
    azure_deployment="sparcgpt4-32",
    api_key=api_key,
    azure_endpoint = "https://hclsparcaimodel.openai.azure.com/",
    temperature=0,
)

### We create an instance of OpenAI’s language model and the PDF search tool, then bind them together so the chatbot can use both for generating responses.

In [None]:
# Bind the tools to the LLM
llm_with_tools = llm.bind_tools(tools)

In [None]:
llm_with_tools

RunnableBinding(bound=AzureChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x796a65d56bf0>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x796a65d57160>, temperature=0.0, openai_api_key=SecretStr('**********'), openai_proxy='', azure_endpoint='https://hclsparcaimodel.openai.azure.com/', deployment_name='sparcgpt4-32', openai_api_version='2023-03-15-preview', openai_api_type='azure'), kwargs={'tools': [{'type': 'function', 'function': {'name': 'pdf_search_tool', 'description': '', 'parameters': {'type': 'object', 'properties': {'query': {}}, 'required': ['query']}}}]})

### Build the Chatbot Function -

we define the chatbot function that processes user inputs and generates responses.

This function takes the current conversation (state) and generates a response using the language model, possibly using the PDF search tool if relevant.

In [None]:
def chatbot(state: State):
    return {"messages": [llm_with_tools.invoke(state["messages"])]}

In [None]:
# Create a test state with a simple message
test_state = {
    "messages": [("user", "what is AI?")]
}

In [None]:
chatbot(test_state)['messages']

[AIMessage(content="AI, or Artificial Intelligence, refers to the simulation of human intelligence in machines that are programmed to think like humans and mimic their actions. The term may also be applied to any machine that exhibits traits associated with a human mind such as learning and problem-solving. The ideal characteristic of artificial intelligence is its ability to rationalize and take actions that have the best chance of achieving a specific goal. AI can be categorized as either weak or strong. Weak AI, also known as narrow AI, is an AI system that is designed and trained for a particular task. Virtual personal assistants, such as Apple's Siri, are a form of weak AI. Strong AI, also known as artificial general intelligence, is an AI system with generalized human cognitive abilities.", response_metadata={'token_usage': {'completion_tokens': 148, 'prompt_tokens': 43, 'total_tokens': 191}, 'model_name': 'gpt-4-32k', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprob

### Create the StateGraph and Add Memory

### We create a StateGraph to manage the chatbot’s flow and add memory using SQLite. This allows the chatbot to remember the conversation across interactions.


In [None]:
def execute():
    memory = SqliteSaver.from_conn_string(":memory:")
    graph_builder = StateGraph(State)
    graph_builder.add_node("chatbot", chatbot)

    # Enhancing the Chatbot with Tools
    tool_node = ToolNode(tools=[pdf_tool])
    graph_builder.add_node("tools", tool_node)
    graph_builder.add_conditional_edges("chatbot", tools_condition)
    graph_builder.add_edge("tools", "chatbot")

    graph_builder.set_entry_point("chatbot")
    graph_builder.set_finish_point("chatbot")

    # Adding Memory to the Chatbot
    graph = graph_builder.compile(
        checkpointer=memory,
        interrupt_after=["tools"],
    )
    return graph

###  Create a Gradio Interface
The Gradio interface remains the same, but it will now interact with the chatbot using the in-memory dictionary instead of SQLite.

In [None]:
def interact_with_chatbot(user_input, graph, config):
    for event in graph.stream(
        {"messages": ("user", user_input)}, config, stream_mode="values"
    ):
        return event["messages"][-1].content

def launch_gradio():
    graph = execute()
    config = {"configurable": {"thread_id": "1"}}

    gr.Interface(
        fn=lambda user_input: interact_with_chatbot(user_input, graph, config),
        inputs="text",
        outputs="text",
        title="Chatbot with PDF Integration"
    ).launch()

launch_gradio()

AttributeError: 'PDFSearchTool' object has no attribute '__name__'