In [None]:
!pip install langchain langchain_community langchain-ai21 gpt4all langchain-huggingface faiss-gpu
!pip install transformers torch pypdf sentence-transformers openai bitsandbytes  accelerate  xformers  einops 

# Import Library

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import GPT4AllEmbeddings
import os

# Load PDF Data

In [None]:
pdf_data_path = '/kaggle/input/lichsu'

In [None]:
loader = DirectoryLoader(pdf_data_path, glob="*.pdf", loader_cls = PyPDFLoader)
documents = loader.load()

In [None]:
documents[1].page_content

# Text Splitter

Use API Key AI21 for semantic text splitter

In [None]:
from getpass import getpass
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
AI21_API_KEY = user_secrets.get_secret("ai21")

os.environ["AI21_API_KEY"] = AI21_API_KEY

In [None]:
from langchain_ai21 import AI21SemanticTextSplitter

text_splitter = AI21SemanticTextSplitter(chunk_size=1024, chunk_overlap=128)
chunks = text_splitter.split_documents(documents)

In [None]:
print(chunks[1].page_content)

In [None]:
print(chunks[2].page_content)

# Embedding and VectorStore

In [None]:
from langchain_community.embeddings import HuggingFaceEmbeddings

model_name = 'bkai-foundation-models/vietnamese-bi-encoder'
# model_name = 'keepitreal/vietnamese-sbert'

model_kwargs = {'device': 'cuda'}
encode_kwargs = {'normalize_embeddings': False} # Chuẩn hoá vector embedding
embeddings = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

In [None]:
db = FAISS.from_documents(chunks, embeddings)
# db.save_local('./db')

In [None]:
# db = FAISS.load_local('/kaggle/working/db',embeddings, allow_dangerous_deserialization=True)

In [None]:
query = "Tổng khởi nghĩa Cách mạng Tháng Tám 1945"
res = db.similarity_search_with_score(query)

In [None]:
res

In [None]:
retriever = db.as_retriever(search_kwargs={"k": 3})

# Model HuggingFace Hub + Chain

In [None]:
from langchain_community.llms import HuggingFacePipeline
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig, pipeline, BitsAndBytesConfig
import torch

## Login HuggingFace

In [None]:
HUGGINGFACE_WRITE_TOKEN = user_secrets.get_secret("huggingface-write-token")

## 

In [None]:
from langchain_community.llms import HuggingFaceHub

llm = HuggingFaceHub(
#     repo_id="vilm/vinallama-7b-chat", # 13GB > 10GB  -> not use HuggingFaceHub
    repo_id="vilm/vinallama-2.7b-chat",
    huggingfacehub_api_token = HUGGINGFACE_WRITE_TOKEN
)

In [None]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

# """<|im_start|>system
# Sử dụng thông tin sau đây để trả lời câu hỏi. Nếu bạn không biết câu trả lời, hãy nói không biết, đừng cố tạo ra câu trả lời \n
# {context}<|im_end|>\n
# <|im_start|>user\n
# {question}!<|im_end|>\n
# <|im_start|>assistant
# """

system_prompt = (
    "Sử dụng thông tin sau đây để trả lời câu hỏi."
    "Nếu bạn không biết câu trả lời, hãy nói không biết, đừng cố tạo ra câu trả lời."
    "Câu trả lời mạch lạc và có ý nghĩa. "
    "Context: {context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)
question_answer_chain = create_stuff_documents_chain(llm, prompt)
chain = create_retrieval_chain(retriever, question_answer_chain)

In [None]:
res = chain.invoke({"input": query})

In [None]:
res

In [None]:
print(res['input'])
print(res['answer'])

# Load model (HuggingFacePipeline) + Chain

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

In [None]:
model_path = "vilm/vinallama-7b"
# model_path="vilm/vinallama-2.7b-chat"
# model_path = "vinai/PhoGPT-7B5-Instruct"
# token = user_secrets.get_secret("Token_PhoGPT_7B5")

## Load Tokenizer

In [None]:
tokenizer = AutoTokenizer.from_pretrained(
    model_path,
    trust_remote_code=True,
#     token=token
)

## Load Model quant=4bit

In [None]:
config = AutoConfig.from_pretrained(
    model_path,
    trust_remote_code=True,
#     token=token
)
config.init_device = device
config.temperature = 0.01

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

model = AutoModelForCausalLM.from_pretrained(
    model_path,
    quantization_config=bnb_config,
    config=config,
    trust_remote_code=True,
#     token=token
)

model.eval()

In [None]:
import transformers
from langchain.llms import HuggingFacePipeline

text_generation_pipeline = transformers.pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    repetition_penalty=1.1,
    return_full_text=True,
    max_new_tokens=100,
)
my_pipeline = HuggingFacePipeline(pipeline=text_generation_pipeline)

In [None]:
PROMPT = "### Câu hỏi:\n{instruction}\n\n### Trả lời:"

input_prompt = PROMPT.format_map(
    {"instruction": "Một quả cam có bao nhiêu màu?"}
)
my_pipeline(input_prompt)

In [None]:
from langchain import PromptTemplate

template = "### Câu hỏi:\n{question}\n\n### Trả lời:"
prompt = PromptTemplate(template=template, input_variables=["question"])

In [None]:
from langchain import PromptTemplate
template = prompt = """<|im_start|>system
Dựa vào ngữ cảnh sau để trả lời câu hỏi\n{context}\n
<|im_end|>
<|im_start|>user
{question}<|im_end|>
<|im_start|>assistant"""

prompt_qa = PromptTemplate(template=template, input_variables=["question"])