<a href="https://colab.research.google.com/github/DinhVinh2404/Books_Agent/blob/main/Book_chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%pip install -q langchain langgraph langchain-community langchain-google-genai sqlalchemy ipython-sql langchain-text-splitters pypdf

In [13]:
!pip list | egrep 'langchain|langgraph|sqlalchemy|pypdf|faiss|ipython'

ipython                                  7.34.0
ipython-genutils                         0.2.0
ipython-sql                              0.5.0
langchain                                0.3.27
langchain-community                      0.3.31
langchain-core                           0.3.79
langchain-google-genai                   2.1.12
langchain-text-splitters                 0.3.11
langgraph                                1.0.1
langgraph-checkpoint                     3.0.0
langgraph-prebuilt                       1.0.1
langgraph-sdk                            0.2.9
pypdf                                    6.1.3
sqlalchemy-spanner                       1.17.0


In [2]:
from google.colab import userdata
import os
if not os.environ.get("GOOGLE_API_KEY"):
    os.environ["GOOGLE_API_KEY"] = userdata.get("GOOGLE_API_KEY")

**CREAT TABLES**

In [3]:
import sqlite3

db_path = "/content/books.db"
if os.path.exists(db_path):
    os.remove(db_path)

conn = sqlite3.connect(db_path)
cursor = conn.cursor()

cursor.execute('''
  CREATE TABLE IF NOT EXISTS Books (
    book_id INTEGER PRIMARY KEY,
    title TEXT,
    author TEXT,
    price REAL,
    stock INTEGER,
    category TEXT
  )
''')

cursor.execute('''
  CREATE TABLE IF NOT EXISTS Orders (
    order_id INTEGER PRIMARY KEY,
    customer_name TEXT,
    phone TEXT,
    address TEXT,
    book_id INTEGER,
    quantity INTEGER,
    status TEXT,
    FOREIGN KEY (book_id) REFERENCES Books(book_id)
  )
''')

cursor.execute("INSERT INTO Books VALUES (1, 'Chatbot', 'Vinh', 24.04, 1, 'Test')")
cursor.execute("INSERT INTO Books VALUES (2, 'User manual', 'Vinh2', 2003, 5, 'Manual')")
conn.commit()
cursor.close()
conn.close()



In [31]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import HumanMessage, SystemMessage
from langgraph.prebuilt import create_react_agent
from langchain_community.agent_toolkits import SQLDatabaseToolkit
from langchain.sql_database import SQLDatabase
from langchain.tools import Tool

RAG

In [6]:
from pypdf import PdfReader
import glob
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.tools import StructuredTool
from pydantic import BaseModel, Field

In [34]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

In [8]:
pdf_folder = "/content/pdf_books"
if not os.path.exists(pdf_folder):
    os.makedirs(pdf_folder)

In [9]:
db = SQLDatabase.from_uri(f"sqlite:///{db_path}")
model = ChatGoogleGenerativeAI(model='gemini-2.5-flash')

In [14]:
%pip install -q faiss-cpu
# %pip install -q faiss-gpu

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.4/31.4 MB[0m [31m60.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [15]:
encode_kwargs = {'normalize_embeddings': True}
model_kwargs = {'device': 'cpu'}
embedding_model = HuggingFaceEmbeddings(
    model_name="BAAI/bge-m3",
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

def build_vectostore_for_books():
  all_docs = []
  for book in os.listdir(pdf_folder):
    if book.endswith(".pdf"):
      loader = PyPDFLoader(os.path.join(pdf_folder, book))
      docs = loader.load()
      text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
      chunks = text_splitter.split_documents(docs)
      all_docs.extend(chunks)
  return FAISS.from_documents(all_docs, embedding_model)

vectorstores = build_vectostore_for_books()
retriever = vectorstores.as_retriever()

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise. Always answer the same language with user query"
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

question_answer_chain = create_stuff_documents_chain(model, prompt)
qa_chain = create_retrieval_chain(retriever, question_answer_chain)


In [16]:
class BookQueryInput(BaseModel):
    query: str = Field(..., description="User's question about the book content in stock, e.g., 'What is Chapter 2 of Chatbot about?'")

def book_rag(query: str):
    response = qa_chain.invoke({"input": query})
    answer = response.get("answer", response.get("result", "No answer found."))
    sources = [doc.metadata.get("source", "") for doc in response.get("context", [])]
    return f"Answer: {answer}\nSources: {sources}"

book_rag_tool = StructuredTool.from_function(
    func=book_rag,
    args_schema=BookQueryInput,
    description="Search and retrieve the content of books in stock based on PDF files located in pdf_folder.")

**SQL**

In [18]:
from pydantic import BaseModel, Field
from langchain.tools import StructuredTool

class OrderInput(BaseModel):
    customer_name: str = Field(..., description="Customer name")
    phone: str = Field(..., description="Customer phone number")
    address: str = Field(..., description="Customer address")
    book_id: int = Field(..., description="Book id")
    quantity: int = Field(..., description="Quantity to order")
    status: str = Field(..., description="Order status")

def oder(customer_name: str, phone: str, address: str, book_id: int, quantity: int, status: str):
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()

    cursor.execute("SELECT * FROM Books WHERE book_id = ?", (book_id,))
    book = cursor.fetchone()
    if book is None:
        cursor.close()
        conn.close()
        return f"Book with ID {book_id} not found"
    if book[4] < quantity:
        cursor.close()
        conn.close()
        return f"Not enough stock for book with ID {book_id}"

    cursor.execute(
        "INSERT INTO Orders VALUES (NULL,?,?,?,?,?,?)",
        (customer_name, phone, address, book_id, quantity, status)
    )
    conn.commit()
    cursor.close()
    conn.close()
    return f"Order placed successfully."

order_tool = StructuredTool.from_function(
    func=oder,
    args_schema=OrderInput,
    description="Place an order into the Orders table."
)

In [None]:
SQL_PREFIX = """You are an agent designed to interact with a SQL database.
Given an input question, create a syntactically correct SQLite query to run, then look at the results of the query and return the answer.
Unless the user specifies a specific number of examples they wish to obtain, always limit your query to at most 2 results.
You can order the results by a relevant column to return the most interesting examples in the database.
Never query for all the columns from a specific table, only ask for the relevant columns given the question.
You have access to tools for interacting with the database.
Only use the below tools. Only use the information returned by the below tools to construct your final answer.
You MUST double check your query before executing it. If you get an error while executing a query, rewrite the query and try again.

DO NOT make any DML statements (INSERT, UPDATE, DELETE, DROP etc.) to the books table in database. Can make any changes to the orders table in database

To start you should ALWAYS look at the tables in the database to see what you can query.
Do NOT skip this step.
Then you should query the schema of the most relevant tables."""

system_message = SystemMessage(content=SQL_PREFIX)
toolkit = SQLDatabaseToolkit(db=db, llm=model)
tools = toolkit.get_tools()
tools.append(order_tool)
tools.append(book_rag_tool)
agent_executor = create_react_agent(model=model, tools=tools, prompt=system_message)

In [36]:
def ask_agent(agent_executor, query:str):
  result = agent_executor.invoke({"messages": [HumanMessage(content=query)]})
  final_answer = None
  for msg in reversed(result["messages"]):
      if hasattr(msg, "content") and msg.content.strip():
          final_answer = msg.content
          break
  return final_answer

**VERIFY**

In [None]:
print(db.table_info)

In [None]:
import time
for s in agent_executor.stream(
    {"messages": [HumanMessage(content="I want to order 5 copy of Chatbot. My name is VinhDV, phone 012345678, address Hanoi")]}
):
    time.sleep(5)
    print(s)
    print("----")

{'agent': {'messages': [AIMessage(content='', additional_kwargs={'function_call': {'name': 'sql_db_list_tables', 'arguments': '{}'}}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.5-flash', 'safety_ratings': [], 'grounding_metadata': {}, 'model_provider': 'google_genai'}, id='lc_run--24ccd980-458c-43d5-9aac-7eb6690fde51-0', tool_calls=[{'name': 'sql_db_list_tables', 'args': {}, 'id': 'ef723a53-2c6f-4e62-a524-08ea02624b58', 'type': 'tool_call'}], usage_metadata={'input_tokens': 777, 'output_tokens': 99, 'total_tokens': 876, 'input_token_details': {'cache_read': 0}, 'output_token_details': {'reasoning': 85}})]}}
----
{'tools': {'messages': [ToolMessage(content='Books, Orders', name='sql_db_list_tables', id='23d371ad-d4c1-42af-bc52-fae4f5d4f233', tool_call_id='ef723a53-2c6f-4e62-a524-08ea02624b58')]}}
----
{'agent': {'messages': [AIMessage(content='', additional_kwargs={'function_call': {'name': 'sql_db_sc

In [None]:
import time
for s in agent_executor.stream(
    {"messages": [HumanMessage(content="I want to order 1 copy of Chatbot. My name is VinhDV, phone 012345678, address Hanoi")]}
):
    time.sleep(5)
    print(s)
    print("----")

{'agent': {'messages': [AIMessage(content='', additional_kwargs={'function_call': {'name': 'sql_db_list_tables', 'arguments': '{}'}}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.5-flash', 'safety_ratings': [], 'grounding_metadata': {}, 'model_provider': 'google_genai'}, id='lc_run--fdda23d1-51d2-4a9b-82be-283ec26cfc15-0', tool_calls=[{'name': 'sql_db_list_tables', 'args': {}, 'id': 'd6907f9b-46d7-4521-b4b9-76420007d60d', 'type': 'tool_call'}], usage_metadata={'input_tokens': 777, 'output_tokens': 100, 'total_tokens': 877, 'input_token_details': {'cache_read': 0}, 'output_token_details': {'reasoning': 86}})]}}
----
{'tools': {'messages': [ToolMessage(content='Books, Orders', name='sql_db_list_tables', id='1b752da4-46ad-4250-9b2c-f130762a0837', tool_call_id='d6907f9b-46d7-4521-b4b9-76420007d60d')]}}
----
{'agent': {'messages': [AIMessage(content='', additional_kwargs={'function_call': {'name': 'sql_db_s

In [None]:
import time
for s in agent_executor.stream(
    {"messages": [HumanMessage(content="Sách Chatbot có nội dung chính là gì?")]}
):
    time.sleep(5)
    print(s)
    print("----")

{'agent': {'messages': [AIMessage(content='', additional_kwargs={'function_call': {'name': 'book_rag', 'arguments': '{"query": "S\\u00e1ch Chatbot c\\u00f3 n\\u1ed9i dung ch\\u00ednh l\\u00e0 g\\u00ec?"}'}}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.5-flash', 'safety_ratings': []}, id='run--36ccbd41-01b8-4e7b-8bab-17228e9aff12-0', tool_calls=[{'name': 'book_rag', 'args': {'query': 'Sách Chatbot có nội dung chính là gì?'}, 'id': 'e5bb1b9a-1e0a-4b9d-848b-abef5d5cec76', 'type': 'tool_call'}], usage_metadata={'input_tokens': 838, 'output_tokens': 87, 'total_tokens': 925, 'input_token_details': {'cache_read': 684}, 'output_token_details': {'reasoning': 62}})]}}
----
{'tools': {'messages': [ToolMessage(content='Answer: Sách "Cẩm Nang Chatbot" cung cấp kiến thức toàn diện về chatbot, từ định nghĩa cơ bản và cách chúng hoạt động đến các ứng dụng thực tiễn trong kinh doanh, đặc biệt là dịch vụ khách hàng và 

In [37]:
query = "Sách user manual có nội dung chính là gì?"
answer = ask_agent(agent_executor, query)
print(answer)

Sách "Hướng Dẫn Sử Dụng Nền Tảng Trợ Lý Ảo" giới thiệu về nền tảng trợ lý ảo, mục tiêu và công nghệ của nó. Sách cũng hướng dẫn chi tiết cách đăng ký, đăng nhập tài khoản, và cách khắc phục lỗi hoặc liên hệ hỗ trợ khi cần thiết.


In [None]:
query = "Tôi muốn đặt 5 sách chatbot"
answer = ask_agent(agent_executor, query)
print(answer)

Bạn muốn đặt 5 sách chatbot. Để đặt hàng tôi cần thông tin của bạn. Vui lòng cung cấp tên, số điện thoại và địa chỉ của bạn.


In [None]:
query = "Tôi muốn đặt 5 sách chatbot. Tên tôi là Vinh, sdt 012345678, địa chỉ Hà Nội"
answer = ask_agent(agent_executor, query)
print(answer)

Xin lỗi, hiện tại sách Chatbot không đủ số lượng để bạn đặt 5 cuốn. Chỉ còn 1 cuốn trong kho. Bạn có muốn đặt 1 cuốn không?


In [39]:
query = "Tôi muốn đặt 2 sách user manual. Tên tôi là Vinh, sdt 012345678, địa chỉ Hà Nội"
answer = ask_agent(agent_executor, query)
print(answer)

Bạn đã đặt 2 sách user manual thành công.


In [40]:
query = "Trong kho có những thể loại sách gì"
answer = ask_agent(agent_executor, query)
print(answer)

Trong kho có các thể loại sách là: Test, Manual.
