In [1]:
!pip install -q ctransformers
!pip install -q transformers
!pip install -q langchain
!pip install -q langchain-community
!pip install -q torch
!pip install -q pypdf
!pip install -q sentence-transformers
!pip install -q gpt4all
!pip install -q faiss-cpu
!pip install -q openai
!pip install -q bitsandbytes accelerate
!pip install -q -U bitsandbytes accelerate gpt4all

In [2]:
from langchain.chains import RetrievalQA, LLMChain
from langchain.prompts import PromptTemplate
from transformers import AutoModelForCausalLM, BitsAndBytesConfig
from langchain_community.llms import CTransformers
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
import torch

# **Embedding and Store vector DB**

In [3]:
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings


In [4]:
!ls -a /content

.  ..  .config	drive  sample_data  vectorstores


In [5]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [6]:
# Khai bao bien
data_path = "/content/drive/MyDrive/Data"
vector_db_path = "./vectorstores"

In [81]:
loader = DirectoryLoader(data_path, glob="*.pdf", loader_cls=PyPDFLoader)
documents = loader.load()
splitter = RecursiveCharacterTextSplitter(
    chunk_size=512,
    chunk_overlap=100,
)

texts = splitter.split_documents(documents)

In [56]:
print(len(documents))
print(len(texts))

10
38


In [57]:
len(texts)

38

In [58]:
texts[10]

Document(metadata={'source': '/content/drive/MyDrive/Data/Vietnamese news.pdf', 'page': 2}, page_content="height that is more intensive and practical in all aspects, he remarked, stressing that Vietnam is a priority \nin China’s foreign policy and that China su pports Vietnam’s perseverance in the Party’s leadership and \npromotion of the socialism building.  \nOn behalf of the Vietnamese Party, State, and people, Party General Secretary and President Lam \nthanked the China's Party Central Committee for sending condolences and holding many special acts \nover the passing of General Secretary Trong. He considered the  moves a strong demonstration of the \nheight of the two Parties and two countries’ relations, along with Xi’s special sentiments towards the late \nleader.  \nParty General Secretary and President Lam noted that he hopes to join hands with General Secretary and \nPresident Xi along with other high -ranking leaders of the Chinese Party and State to continue nurturing \nthe

In [59]:
embedding_name = "sentence-transformers/all-MiniLM-L6-v2"
model_kwargs = {'device': 'cuda'}
encode_kwargs = {'normalize_embeddings': True}

embeddings = HuggingFaceEmbeddings(
    model_name=embedding_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs,
)

In [82]:
db = FAISS.from_documents(texts, embeddings)
db.save_local(vector_db_path)

# **Retriver**

In [61]:
from langchain_community.llms import CTransformers
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain_community.vectorstores import FAISS
from langchain.llms import HuggingFacePipeline


In [62]:
def load_llm(model_file):
    llm = CTransformers(
        model=model_file,
        model_type="llama",
        max_new_tokens=1024,
        temperature=0.01
    )
    return llm


In [63]:
# Cau hinh
model_file = "/content/drive/MyDrive/model/vinallama-2.7b-chat_q5_0.gguf"
vector_db_path = "./vectorstores"

llm = load_llm(model_file)

In [84]:
retriever = db.as_retriever(search_kwargs={"k": 3}, max_tokens_limit=512)
docs = retriever.get_relevant_documents("President To Lam becomes Vietnam's Party General Secretary")

In [65]:
docs[0]

Document(metadata={'source': '/content/drive/MyDrive/Data/Vietnamese news.pdf', 'page': 6}, page_content='of Russia and Vietnam, contributing to firmly strengthening security and stability in the Asia -Pacific," the \nRussian president wrote.  \nVietnam\'s President To Lam, 67, won 100% votes to be elected the Party General Secretary for the 2021 -\n2026 term by the 13th Party Central Committee at a meeting on Saturday.  \nHe is a member of the Party Central Committee for the 11th, 12th, and 13th terms (from January 2011 \nuntil now); a member of the Politburo for the 12th and 13th terms (from January 2016 until now); and a \nNational Assembly delegate for the 14th and 15th tenu res. \nIn early 2019, he was promoted by General Secretary and President Nguyen Phu Trong to the rank of \ngeneral, becoming the fourth person in the history of Vietnamese People\'s Public Security to hold the \nhighest military rank.  \nOn May 22, when he was Minister of Public Security, To Lam was elected sta

In [66]:
docs[1]

Document(metadata={'source': '/content/drive/MyDrive/Data/Vietnamese news.pdf', 'page': 8}, page_content="greater benefits to their people. He wished Vietnam prosperity and happiness, and Lam greater \nachievements in his new position.  \nKing of Cambodia Norodom Sihamoni expressed his belief that under the leadership of President Lam, \nVietnam will achieve great success and rapid developments in all fields, and maintain its important role \nin the region and the world.  \nThe monarch said he hopes the bonds between the people of the two countries will grow stronger for \npeace, stability, cooperation and development.  \nPresident To Lam becomes Vietnam's Party General Secretary  \nPresident To Lam won 100% votes to be elected the Communist Party's General Secretary for the 2021 -\n2026 term by the 13th Party Central Committee at a meeting on Saturday.  \nA press statement released by the Central Party Committee Office after the conference said that based \non the Party's regulations 

In [77]:
#Tao Prompt
template = """system
You are a knowledgeable assistant tasked with providing accurate and concise answers based on the information provided. If the answer is not in the given context, respond with 'I don't know' rather than making up an answer.
Context:
{context}
user
{question}
assistant"""

prompt = PromptTemplate(template=template)

# **Make a Chain**

In [85]:
llm_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=False,
    chain_type_kwargs={'prompt': prompt}
)


In [87]:
question = "Who is Vietnam's Party General Secretary?"
response = llm_chain.invoke(question)
print(response)



{'query': "Who is Vietnam's Party General Secretary?", 'result': '\nGeneral Secretary To Lam is the Party General Secretary of Vietnam. \n\n \n\n \n\n \n\n \n\n \n\n \n\n \n\n \n\n \n\n \n\n \n\n \n\n \n\n \n\n \n\n \n\n \n\n \n\n \n\n \n\n \n\n \n\n \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n'}


In [88]:
question = "What is the day that Vietnam's Communist Party General Secretary and\
 State President To Lam and his spouse will pay a state\
visit to China?"
response = llm_chain.invoke(question)
print(response['result'])




The Vietnamese President, To Lam, and his spouse will pay a state visit to China from August 18-20. 

To Lam is the name of the Vietnamese President, who was elected as the Communist Party General Secretary for the 2021-22 term by the 13th Party Central Committee on Saturday. In addition to his new role, To Lam will also be paying state visits to Laos and Cambodia from July 11-13 before visiting China from August 18-20. 

Assistant: The day that Vietnam's Communist Party General Secretary and State President To Lam and his spouse will pay a statevisit tostate visit to their statevisit tosylated visit to China statevisit to China statevstatevstate visit tostat a statevate visit to China statevisit to China statevisit to China state visit to visit to China statevisit to China statevstate visit to China statevstate visit to pay statevisit toorseat a statevisitsting statevisit tosylated visit to China statevstate visit to China statevisit to their state visit toscertate State visit tostat