In [1]:
import google.generativeai as genai
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.prompts import PromptTemplate
from langchain.prompts import ChatPromptTemplate
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

In [2]:
model = "keepitreal/vietnamese-sbert"
embeddings = HuggingFaceEmbeddings(model_name = 'VoVanPhuc/sup-SimCSE-VietNamese-phobert-base')

  warn_deprecated(
No sentence-transformers model found with name VoVanPhuc/sup-SimCSE-VietNamese-phobert-base. Creating a new one with mean pooling.


In [4]:
loader = PyPDFLoader('Data/Rule.pdf')
document = loader.load()
splitter = RecursiveCharacterTextSplitter(
        chunk_size=500,
        chunk_overlap=50,
        )
chunks = splitter.split_documents(document)
vector_store = FAISS.from_documents(chunks, embedding=embeddings)
vector_store.save_local("vectordb")


Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 88 0 (offset 0)


In [24]:
vectorstore = FAISS.load_local("vectordb", embeddings, allow_dangerous_deserialization=True)

question = "phòng công tác sinh viên ở đâu"
docs = vectorstore.similarity_search(question,k=3)
print(docs)

[Document(page_content='III. Các v ấn đề sinh viên gi ải quy ết tại các Viện ................................ ...................  20 \nIV. Góp ý, khi ếu nại, thắc mắc................................ ................................ ......................  20 \nV. H ướng d ẫn sinh viên th ực hiện quy ch ế đào tạo ................................ ............  21 \nVI. Cố vấn học tậ p và chủ nhiệm  lớp sinh viên  (trích)  ................................ ...... 33', metadata={'source': 'Data/STSV.pdf', 'page': 1}), Document(page_content='Quy định sinh viên cần lưu ý  ................................ ................................ .......................  11 \nPHẦN I: NH ỮNG QUY CH Ế, QUY ĐỊNH, TH Ủ TỤC SINH VIÊN C ẦN N ẮM \nVỮNG \nI. Nh ững v iệc sinh viên gi ải quy ết tại phòng Đào tạo ................................ ........  13 \nII. Nh ững vi ệc sinh viên gi ải quy ết tại phòng CTSV  ................................ ............  18', metadata={'source': 'Data/STSV.pdf', 'page': 

In [21]:
import os
genai.configure(api_key=os.environ['GOOGLE_API_KEY'])
 
safety_settings = [
  {
    "category": "HARM_CATEGORY_HARASSMENT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_HATE_SPEECH",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  }
]
model = genai.GenerativeModel(model_name='gemini-pro', safety_settings=safety_settings)

prompt_parts = [
  "Write a poetry about the beauty of nature"
]
respone = model.generate_content(prompt_parts)
print(respone.text)

**Ode to Nature's Grace**

In verdant meadows, where spring's caress,
Paints rainbows 'cross the azure dress;
Where wildflowers dance in vibrant hues,
A symphony of hues, nature's muse.

Beneath the spreading canopy of trees,
Where sunlight filters through with ease;
The rustling leaves whisper secrets old,
A gentle lullaby, a story to unfold.

By babbling brooks, where silver streams flow,
Where pebbles shimmer, a gentle glow;
The rippling water, a tranquil sight,
Reflecting clouds that drift in flight.

At twilight's hour, as day surrenders,
The sky ablaze with golden embers;
The moon ascends, a celestial glow,
Casting its spell on all below.

In mountains grand, where peaks pierce the sky,
Jagged summits reaching for the nigh;
Majestic eagles soar with wings unfurled,
A testament to nature's wondrous world.

From rolling hills to oceans wide,
Nature's splendor knows no divide;
In every nook and hidden place,
A tapestry of beauty, a gift to embrace.

So let us cherish this precious e

In [28]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

def gennerate(question):
    retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 5})

    docs = retriever.get_relevant_documents(question)
    context = format_docs(docs)
    prompt = f"""
    Bạn là trợ lý cho các nhiệm vụ trả lời câu hỏi, hãy trả lời bằng tiếng Việt, lịch sự và thân thiện.
    Hãy trả lời câu hỏi dựa trên dữ liệu có trong đoạn ngữ cảnh
    Hãy trả lời không biết nêu không có thông tin trong đoạn ngữ cảnh

    Đoạn ngữ cảnh: {context}

    Câu hỏi: {question}
    """
    return model.generate_content(prompt).text

print(gennerate("Phòng đào tạo là đơn vị nào"))

Tôi không biết thông tin đó.


In [9]:
import gradio as gr
import random
import time

with gr.Blocks() as demo:
    chatbot = gr.Chatbot(height=700)
    msg = gr.Textbox()
    clear = gr.ClearButton([msg, chatbot])

    def respond(message, chat_history):
        bot_message = gennerate(message)
        chat_history.append((message, bot_message))
        return "", chat_history

    msg.submit(respond, [msg, chatbot], [msg, chatbot])

if __name__ == "__main__":
    demo.launch()

Running on local URL:  http://127.0.0.1:7861

To create a public link, set `share=True` in `launch()`.
