<a href="https://colab.research.google.com/github/Lister223/RAG_travelDemo_QA/blob/main/RAG_%E6%97%85%E9%81%8Ademo%E5%95%8F%E7%AD%94.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install openai
!pip install langchain
!pip install --upgrade --quiet  langchain-openai
!pip install -U langchain-community
!pip install chromadb
!pip install PyMuPDF
!pip install gradio==3.47.1

In [None]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain.vectorstores import Chroma
from langchain.indexes import VectorstoreIndexCreator
from langchain_openai import OpenAIEmbeddings
from google.colab import userdata
from langchain.schema import Document
import fitz
import os
import gradio as gr

In [None]:
os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')

In [None]:
# 定義文件讀取
def read_pdf(file):
    text = ''
    with fitz.open(file) as doc:
        for page in doc:
            text += page.get_text()
    return text

In [None]:
# 向量化&資料庫儲存設置
embeddings = OpenAIEmbeddings(model="text-embedding-3-large", deployment="text-embedding-ada-002-1")
index_creator = VectorstoreIndexCreator(
    embedding=embeddings,
    vectorstore_cls=Chroma,
    vectorstore_kwargs={"persist_directory": "./vector"}
)

In [None]:
# 定義資料儲存
def process_and_store(file):
    text = read_pdf(file)
    if text.startswith("Error:"):
        return text
    document = Document(page_content=text)  # 確保 text 被轉換為 Document 對象
    docsearch = index_creator.from_documents([document])
    print("向量資料庫已更新")
    return "文件已成功儲存到向量資料庫中。你可以開始提問了。"

In [None]:
# 連接資料庫
db = Chroma(persist_directory='./vector', embedding_function=embeddings)
# 設定檢索器-檢索db
retriever = db.as_retriever(search_kwargs={"k": 3})

In [None]:
# 設定模型
llm = ChatOpenAI(model="gpt-4o")

# 系統prompt 設置
system_prompt =  '''
    1.Use the given context to answer the question.
    2.If you don't know the answer, say you don't know.
    3.Use three sentence maximum and keep the answer concise.
    4.不知道答案就回答「抱歉!我不清楚這個答案」，不要試圖編造答案。
    5.可參考上下文、歷史對話回覆使用者的問題
    6.你是一位旅遊助理，請根據景點資訊回覆使用者的問題
    7.你可以參考使用者上傳的文件內容回復。
    Context: {context}
    '''

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)
question_answer_chain = create_stuff_documents_chain(llm, prompt)
chain = create_retrieval_chain(retriever, question_answer_chain)

In [None]:
# 定義問答函數
def answer_question(question,history,stream):
    # 將歷史對話作為上下文
    context = " ".join([f"Human: {q}\nAssistant: {a}" for q, a in history])
    results = retriever.get_relevant_documents(question)
    print("檢索結果:", results)
    answer = chain.invoke({"input": question , "context": context})['answer']
    print("模型回答:", answer)
    if stream:
      reply = ''
      for char in answer:
        reply += char
        yield reply
    else:
      yield answer

In [None]:
# Gradio 頁面
with gr.Blocks() as demo:
    gr.Markdown("<p style='font-size:20px;'>歡迎您使用旅遊助理服務!!<br>請上傳您想提問的旅遊景點PDF文件。</p>")

    file_input = gr.File(type="file")
    store_output = gr.Textbox(label="儲存狀態")

    file_input.change(fn=read_pdf, inputs=file_input)
    file_input.change(fn=process_and_store, inputs=file_input, outputs=store_output)

    chatbot = gr.ChatInterface(fn=answer_question,additional_inputs=[
                                     gr.Checkbox(label='Stream', value=True)
                                     ])

demo.queue().launch(share=True)

IMPORTANT: You are using gradio version 3.47.1, however version 4.44.1 is available, please upgrade.
--------
IMPORTANT: You are using gradio version 3.47.1, however version 4.44.1 is available, please upgrade.
--------
Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://5af8e847880db89623.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


