In [None]:
pip install --upgrade pymupdf

In [1]:
import fitz


pdf1 = "/Users/kittipot/Desktop/AI Engineer Test/ai_test_user_feedback.pdf"
pdf2 = "/Users/kittipot/Desktop/AI Engineer Test/ai_test_bug_report.pdf"



from langchain.docstore.document import Document

texts = []

for file_path in [pdf1, pdf2]:
    doc = fitz.open(file_path)
    for i, page in enumerate(doc):
        content = page.get_text()
        if content.strip():
            texts.append(Document(
                page_content=content,
                metadata={"source": file_path, "page": i + 1}
            ))

for i, doc in enumerate(texts[:3]):  # ดูแค่ 3 อันแรกพอ
    print(f"🔹 Document {i+1}")
    print(f"Source: {doc.metadata['source']} - Page: {doc.metadata['page']}")
    print(doc.page_content[:300])  # แสดงเนื้อหาต้น 300 ตัวอักษร
    print("------")

🔹 Document 1
Source: /Users/kittipot/Desktop/AI Engineer Test/ai_test_user_feedback.pdf - Page: 1
Okay, here are 50 distinct customer feedback snippets, following the format you requested: 
Feedback #1: I tried uploading a large PDF, and it just got stuck at the very end. It says 99% for 
ages! 
Feedback #2: The search is useless when I look for things like "CEO". It just finds documents 
with t
------
🔹 Document 2
Source: /Users/kittipot/Desktop/AI Engineer Test/ai_test_user_feedback.pdf - Page: 2
Feedback #17: Sometimes, random scrollbars appear on the side of the page even when 
there's nothing to scroll. 
Feedback #18: I tried to delete a document, and it just disappeared without even asking if I was 
sure. 
Feedback #19: When I hover over some of the icons, the little help text doesn't sh
------
🔹 Document 3
Source: /Users/kittipot/Desktop/AI Engineer Test/ai_test_user_feedback.pdf - Page: 3
Feedback #37: Sometimes when I tap on things, it doesn't respond right away. 
Feedback #38

In [3]:
for doc in texts[:3]:
    print(doc.metadata)

{'source': '/Users/kittipot/Desktop/AI Engineer Test/ai_test_user_feedback.pdf', 'page': 1}
{'source': '/Users/kittipot/Desktop/AI Engineer Test/ai_test_user_feedback.pdf', 'page': 2}
{'source': '/Users/kittipot/Desktop/AI Engineer Test/ai_test_user_feedback.pdf', 'page': 3}


In [5]:
import os
from dotenv import load_dotenv

# load environment
load_dotenv(dotenv_path="/Users/kittipot/Desktop/API KEY/HUGGINGFACE_API_KEY.env")

# ตั้งค่า API Key ของ Hugging Face
HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY") # ใส่คีย์ API ของคุณที่นี่

if "HUGGINGFACE_API_KEY" in os.environ:
    print("Environment variable loaded successfully!")
    print("HUGGINGFACE_API_KEY:", os.environ["HUGGINGFACE_API_KEY"])  # ตรวจสอบค่า (อย่าแชร์ค่า API Key จริง)
else:
    print("Failed to load environment variable.")


Environment variable loaded successfully!
HUGGINGFACE_API_KEY: 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


In [7]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

#HuggingFaceEmbeddings
model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
embeddings = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

# storing embeddings in the vector store
vectorstore = FAISS.from_documents(texts, embeddings)

# Retrieve and generate using the relevant snippets of the blog.
retriever = vectorstore.as_retriever()

In [9]:
# เรียกข้อมูลจาก retriever ด้วย query
docs = retriever.invoke("email notification")
# รวม page_content ทั้งหมดมาเป็น string เดียวก่อนส่งเข้า prompt
context_str = "\n\n".join([doc.page_content for doc in docs])

In [11]:
# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-generation", model="meta-llama/Llama-3.2-1B")

Device set to use mps:0


In [13]:
# Meta Llama 3 Instruct uses a prompt template, with special tags used to indicate the user query and system prompt.
# You can find the documentation on this [model card](https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3/#meta-llama-3-instruct).

from langchain_core.runnables import Runnable
class Llama3PromptRunnable(Runnable):
    def __init__(self, system=""):
        super().__init__()
        self.system = system

    def invoke(self, inputs: dict, config=None) -> str:
        question = inputs["question"]
        context = inputs["context"]
        # Create the system prompt if provided
        system_prompt = ""
        if self.system != "":
            system_prompt = (
                f"<|start_header_id|>system<|end_header_id|>\n\n{self.system}\n\n"
                f"context: {context}\n\n"
                f"<|eot_id|>\n\n"
            )
            prompt = (
                f"<|begin_of_text|>{system_prompt}"
                f"<|start_header_id|>user<|end_header_id|>\n\n"
                f"{question}\n\n"
                f"<|eot_id|>\n\n"
                f"<|start_header_id|>assistant<|end_header_id|>\n\n" # header - assistant
            )

        # Return the formatted prompt
        return prompt

In [16]:
# Example usage
llama_prompt = Llama3PromptRunnable(
    system="""
You are a Q&A assistant.
Only use the context provided below to answer the user's question.
If the answer is not in the context, reply exactly: "I don't know."
Do not generate or guess. Do not make up any information.
Do not use prior knowledge. Follow this strictly.

<context>
{context}
</context>

Answer the following question based on the above context.
""",
)

from langchain_core.runnables import RunnablePassthrough

formatted_prompt = llama_prompt.invoke({
    "context": context_str,
    "question": "What are the issues reported on email notification?"
})

# ลองพิมพ์ดู prompt ที่ format แล้ว
print(formatted_prompt)

<|begin_of_text|><|start_header_id|>system<|end_header_id|>


You are a Q&A assistant.
Only use the context provided below to answer the user's question.
If the answer is not in the context, reply exactly: "I don't know."
Do not generate or guess. Do not make up any information.
Do not use prior knowledge. Follow this strictly.

<context>
{context}
</context>

Answer the following question based on the above context.


context: Title: Date Filter Not Working Correctly 
Description: Filtering documents by date range does not consistently 
return the expected results. Some documents within the specified range 
are missing, while others outside the range are included. 
Steps to Reproduce: 
1. Navigate to the document library. 
2. Apply a date filter (e.g., "Last Month"). 
3. Review the filtered documents. 
Environment: All platforms, Filtering Module v1.5 
Severity: Medium 
Proposed Fix: Review and correct the date comparison logic in the filtering 
mechanism. 
 
 
Bug #7 
Title: Email No

In [None]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableLambda

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {
        "context": retriever | format_docs,
        "question": RunnablePassthrough()
    }
    | llama_prompt
    | pipe
    | RunnableLambda(lambda x: x[0]["generated_text"])
    | StrOutputParser()

In [None]:
answer = rag_chain.invoke("What are the issues reported on email notification?")