In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import GPT4AllEmbeddings

pdf_data_path = 'C:\Project\data'
vector_db_path = 'vectorstore/db_history'

def create_db_from_file():
    loader = DirectoryLoader(pdf_data_path, glob="*.pdf", loader_cls = PyPDFLoader)
    documents = loader.load()

    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 512, chunk_overlap = 50)
    chunks = text_splitter.split_documents(documents)

    embedding_model = GPT4AllEmbeddings(model_file = 'models/PhoGPT-4B-Chat-Q4_K_M.gguf')
    db = FAISS.from_documents(chunks, embedding= embedding_model)
    db.save_local(vector_db_path)
    return db

create_db_from_file()

In [None]:
from langchain_community.llms import LlamaCpp
from langchain.prompts import PromptTemplate
from langchain_community.embeddings import GPT4AllEmbeddings
from langchain_community.vectorstores import FAISS

# C·∫•u h√¨nh
model_file = "C:/Project/models/PhoGPT-4B-Chat-Q4_K_M.gguf"
vector_db_path = "vectorstore/db_history"

# Load LLM
def llm_load(model_path): 
    llm = LlamaCpp(
        model_path=model_path,
        temperature=0.01,
        n_gpu_layers=20,
        max_new_tokens=256,
        n_ctx=1024  # n·∫øu model h·ªó tr·ª£
    )
    return llm

# Load vector DB v√† truy v·∫•n BM25
def get_context_from_bm25(question, k=2):
    embedding_model = GPT4AllEmbeddings(model_file=model_file)
    db = FAISS.load_local(vector_db_path, embedding_model, allow_dangerous_deserialization=True)

    retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": k})
    docs = retriever.get_relevant_documents(question)
    
    context = "\n\n".join([doc.page_content for doc in docs])
    return context

llm = llm_load(model_file)
# T·∫°o prompt
def build_prompt(context, question):
    template = """[Vai tr√≤]
                  B·∫°n l√† m·ªôt tr·ª£ l√Ω AI th√¥ng minh, am hi·ªÉu s√¢u s·∫Øc v·ªÅ l·ªãch s·ª≠ Vi·ªát Nam v√† th·∫ø gi·ªõi, ƒë·∫∑c bi·ªát l√† c√°c giai ƒëo·∫°n l·ªãch s·ª≠ quan tr·ªçng nh∆∞ th·ªùi k·ª≥ phong ki·∫øn, th·ªùi k·ª≥ thu·ªôc ƒë·ªãa, chi·∫øn tranh v√† hi·ªán ƒë·∫°i.
                  Khi ng∆∞·ªùi d√πng ƒë·∫∑t c√¢u h·ªèi li√™n quan ƒë·∫øn l·ªãch s·ª≠, h√£y ∆∞u ti√™n ph√¢n t√≠ch ƒë√∫ng ng·ªØ c·∫£nh, n√™u r√µ nguy√™n nh√¢n ‚Äì k·∫øt qu·∫£, v√† cung c·∫•p th√¥ng tin ch√≠nh x√°c d·ª±a tr√™n t√†i li·ªáu l·ªãch s·ª≠ ch√≠nh th·ªëng.
                  N·∫øu ng∆∞·ªùi d√πng ƒë·∫∑t m·ªôt c√¢u h·ªèi **kh√¥ng li√™n quan ƒë·∫øn l·ªãch s·ª≠**, v√≠ d·ª• v·ªÅ khoa h·ªçc, ƒë·ªùi s·ªëng, ho·∫∑c ch√≠nh b·∫°n, b·∫°n v·∫´n c√≥ th·ªÉ tr·∫£ l·ªùi m·ªôt c√°ch t·ª± nhi√™n, th√¥ng minh v√† th√¢n thi·ªán ‚Äì mi·ªÖn l√† c√¢u h·ªèi h·ª£p l√Ω v√† kh√¥ng y√™u c·∫ßu th√¥ng tin v∆∞·ª£t gi·ªõi h·∫°n.
                  N·∫øu c√¢u h·ªèi m∆° h·ªì ho·∫∑c thi·∫øu th√¥ng tin, h√£y l·ªãch s·ª± y√™u c·∫ßu ng∆∞·ªùi d√πng l√†m r√µ th√™m ƒë·ªÉ c√≥ th·ªÉ gi√∫p ƒë·ª° t·ªët h∆°n.

                B·ªëi c·∫£nh (n·∫øu c√≥):  {context}
                C√¢u h·ªèi:  {question}

                [Format Output]

                - D·ª±a v√†o th√¥ng tin l·ªãch s·ª≠ ch√≠nh x√°c
                - Tr√¨nh b√†y d·ªÖ hi·ªÉu, d√†nh cho ng∆∞·ªùi ph·ªï th√¥ng
                - C√≥ th·ªÉ tr√≠ch d·∫´n m·ªëc th·ªùi gian, nh√¢n v·∫≠t l·ªãch s·ª≠, s·ª± ki·ªán li√™n quan
                - N·∫øu kh√¥ng ƒë·ªß th√¥ng tin, h√£y n√≥i r√µ r·∫±ng b·∫°n kh√¥ng ch·∫Øc ch·∫Øn ho·∫∑c g·ª£i √Ω ng∆∞·ªùi d√πng ki·ªÉm tra th√™m t√†i li·ªáu

                Tr·∫£ l·ªùi:"""
    prompt = PromptTemplate.from_template(template)
    return prompt.format(context=context, question=question)
# Th·ª±c thi v·ªõi c√¢u h·ªèi v√† l·∫•y ph·∫£n h·ªìi t·ª´ vector DB ho·∫∑c LLM tr·ª±c ti·∫øp
def handle_question(question):
    context = get_context_from_bm25(question, k=3)
    if context:  # N·∫øu c√≥ d·ªØ li·ªáu tr·∫£ v·ªÅ t·ª´ vector DB
        final_prompt = build_prompt(context, question)
        response = llm.invoke(final_prompt)
    else:
        # N·∫øu kh√¥ng c√≥ d·ªØ li·ªáu tr·∫£ v·ªÅ t·ª´ vector DB, tr·∫£ l·ªùi b·∫±ng LLM
        prompt = f"B·∫°n l√† m·ªôt tr·ª£ l√Ω AI th√¥ng minh. C√¢u h·ªèi: {question}."
        response = llm.invoke(prompt)
    return response
# Th·ª±c thi
question = "Sao b·∫°n tr·∫£ l·ªùi v·ªã vua ƒë·∫ßu ti√™n c·ªßa Vi·ªát Nam l√† ƒêinh Ti√™n Ho√†ng?"
context = get_context_from_bm25(question, k=3)
print(context)
final_prompt = build_prompt(context, question)

response = handle_question(question)
print(response)



In [None]:
from langchain_community.llms import LlamaCpp
from langchain.prompts import PromptTemplate
from langchain_community.embeddings import GPT4AllEmbeddings
from langchain_community.vectorstores import FAISS

# --- C·∫•u h√¨nh ---
model_file = "C:/Project/models/PhoGPT-4B-Chat-Q4_K_M.gguf"
vector_db_path = "vectorstore/db_history"

# --- Load m√¥ h√¨nh LLM ---
def llm_load(model_path): 
    llm = LlamaCpp(
        model_path=model_path,
        temperature=0.01,
        n_gpu_layers=20,
        max_new_tokens=512,
        n_ctx=2048  # T√πy model
    )
    return llm

# --- T·∫£i vector DB v√† t√¨m t√†i li·ªáu li√™n quan ---
def get_context_from_bm25(question, k=3):
    embedding_model = GPT4AllEmbeddings(model_file=model_file)
    db = FAISS.load_local(vector_db_path, embedding_model, allow_dangerous_deserialization=True)
    retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": k})
    docs = retriever.get_relevant_documents(question)
    context = "\n\n".join([doc.page_content for doc in docs])
    return context

# --- Prompt Template ch√≠nh ---
def build_prompt(context, question):
    template = """[Vai tr√≤]
B·∫°n l√† m·ªôt tr·ª£ l√Ω AI th√¥ng minh, am hi·ªÉu s√¢u s·∫Øc v·ªÅ l·ªãch s·ª≠ Vi·ªát Nam v√† th·∫ø gi·ªõi, ƒë·∫∑c bi·ªát l√† c√°c giai ƒëo·∫°n l·ªãch s·ª≠ quan tr·ªçng nh∆∞ th·ªùi k·ª≥ phong ki·∫øn, th·ªùi k·ª≥ thu·ªôc ƒë·ªãa, chi·∫øn tranh v√† hi·ªán ƒë·∫°i.

Khi ng∆∞·ªùi d√πng ƒë·∫∑t c√¢u h·ªèi li√™n quan ƒë·∫øn l·ªãch s·ª≠, h√£y ∆∞u ti√™n ph√¢n t√≠ch ƒë√∫ng ng·ªØ c·∫£nh, n√™u r√µ nguy√™n nh√¢n ‚Äì k·∫øt qu·∫£, v√† cung c·∫•p th√¥ng tin ch√≠nh x√°c d·ª±a tr√™n t√†i li·ªáu l·ªãch s·ª≠ ch√≠nh th·ªëng.

N·∫øu ng∆∞·ªùi d√πng ƒë·∫∑t m·ªôt c√¢u h·ªèi **kh√¥ng li√™n quan ƒë·∫øn l·ªãch s·ª≠**, v√≠ d·ª• v·ªÅ khoa h·ªçc, ƒë·ªùi s·ªëng, ho·∫∑c ch√≠nh b·∫°n, b·∫°n v·∫´n c√≥ th·ªÉ tr·∫£ l·ªùi m·ªôt c√°ch t·ª± nhi√™n, th√¥ng minh v√† th√¢n thi·ªán ‚Äì mi·ªÖn l√† c√¢u h·ªèi h·ª£p l√Ω v√† kh√¥ng y√™u c·∫ßu th√¥ng tin v∆∞·ª£t gi·ªõi h·∫°n.

N·∫øu c√¢u h·ªèi m∆° h·ªì ho·∫∑c thi·∫øu th√¥ng tin, h√£y l·ªãch s·ª± y√™u c·∫ßu ng∆∞·ªùi d√πng l√†m r√µ th√™m ƒë·ªÉ c√≥ th·ªÉ gi√∫p ƒë·ª° t·ªët h∆°n.

---

B·ªëi c·∫£nh (n·∫øu c√≥):  
{context}

C√¢u h·ªèi:  
{question}

---

[Format Output]

- D·ª±a v√†o th√¥ng tin ch√≠nh x√°c
- Tr√¨nh b√†y r√µ r√†ng, d·ªÖ hi·ªÉu
- Tr√≠ch d·∫´n m·ªëc th·ªùi gian, nh√¢n v·∫≠t, s·ª± ki·ªán (n·∫øu c√≥)
- N·∫øu kh√¥ng ƒë·ªß th√¥ng tin, h√£y n√≥i r√µ r·∫±ng b·∫°n ch∆∞a ch·∫Øc ch·∫Øn ho·∫∑c g·ª£i √Ω ng∆∞·ªùi d√πng t√¨m th√™m t√†i li·ªáu

Tr·∫£ l·ªùi:
"""
    prompt = PromptTemplate.from_template(template)
    return prompt.format(context=context, question=question)

# --- Tr·∫£ l·ªùi c√¢u h·ªèi ch√≠nh ---
def handle_question(llm, question):
    context = get_context_from_bm25(question, k=3)
    prompt = build_prompt(context or "Kh√¥ng c√≥ t√†i li·ªáu l·ªãch s·ª≠ ph√π h·ª£p.", question)
    response = llm.invoke(prompt)
    return response

# --- Ch·∫°y ---
if __name__ == "__main__":
    llm = llm_load(model_file)

    while True:
        question = input("\n‚ùì B·∫°n mu·ªën h·ªèi g√¨? (g√µ 'exit' ƒë·ªÉ tho√°t): ")
        if question.lower() in ["exit", "quit"]:
            break

        response = handle_question(llm, question)
        print("\nü§ñ Tr·∫£ l·ªùi:\n", response)
