In [None]:
import fitz  # PyMuPDF
import json

pdf_path = "AFTERBURNER CONTROL_Chapter 3.pdf"
doc = fitz.open(pdf_path)

page_data = []

for i in range(len(doc)):
    text = doc[i].get_text()
    page_data.append({
        "page": i + 1,
        "text": text
    })

# save as JSON
with open("extracted_text.json", "w", encoding="utf-8") as f:
    json.dump(page_data, f, ensure_ascii=False, indent=2)

print(f"‚úÖ Extracted {len(page_data)} pages.")


In [None]:
import json
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Load JSON dataset
with open("extracted_text.json", "r", encoding="utf-8") as f:
    pages = json.load(f)

# define chunks 
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 500,
    chunk_overlap = 50
)

docs = []
for p in pages:
    chunks = text_splitter.split_text(p["text"])
    for chunk in chunks:
        doc = Document(
            page_content=chunk,
            metadata={"page": p["page"]}
        )
        docs.append(doc)

print(f"‚úÖ Total chunks: {len(docs)}")


In [8]:
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

# multilingual model for Thai
embedding_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
)

# FAISS index: Facebook AI Similarity Search
db = FAISS.from_documents(docs, embedding_model)

db.save_local("faiss_afterburner_index")

print("‚úÖ FAISS vectorstore saved.")


‚úÖ FAISS vectorstore saved.


In [9]:
# # Load index
# db = FAISS.load_local(
#     "faiss_afterburner_index",
#     embedding_model,
#     allow_dangerous_deserialization=True
# )

# query = "‡∏Å‡∏≤‡∏£‡∏ñ‡∏≠‡∏î metering valve ‡∏ó‡∏≥‡∏≠‡∏¢‡πà‡∏≤‡∏á‡πÑ‡∏£"
# results = db.similarity_search(query, k=2)

# for i, r in enumerate(results, 1):
#     print(f"üîé Chunk {i} (‡∏´‡∏ô‡πâ‡∏≤ {r.metadata['page']}):") # which page of chunk
#     print(r.page_content[:500])
#     print("-" * 80)


In [10]:
from langchain_community.llms import Ollama
from langchain.chains import RetrievalQA

llm = Ollama(model="mistral")

# Load index
db = FAISS.load_local(
    "faiss_afterburner_index",
    embedding_model,
    allow_dangerous_deserialization=True
)

# Retrieval chain (good for untouch data)
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=db.as_retriever())

question = "‡∏Ç‡∏±‡πâ‡∏ô‡∏ï‡∏≠‡∏ô‡∏ñ‡∏≠‡∏î pilot burner regulator ‡∏ï‡πâ‡∏≠‡∏á‡∏ó‡∏≥‡∏≠‡∏¢‡πà‡∏≤‡∏á‡πÑ‡∏£"

response = qa_chain.run(
    f"""‡∏Ñ‡∏∏‡∏ì‡∏Ñ‡∏∑‡∏≠‡∏ú‡∏π‡πâ‡∏ä‡πà‡∏ß‡∏¢‡∏î‡πâ‡∏≤‡∏ô‡πÄ‡∏ó‡∏Ñ‡∏ô‡∏¥‡∏Ñ‡∏†‡∏≤‡∏©‡∏≤‡πÑ‡∏ó‡∏¢ ‡∏ä‡πà‡∏ß‡∏¢‡∏ï‡∏≠‡∏ö‡∏Ñ‡∏≥‡∏ñ‡∏≤‡∏°‡∏ï‡πà‡∏≠‡πÑ‡∏õ‡∏ô‡∏µ‡πâ‡∏≠‡∏¢‡πà‡∏≤‡∏á‡∏Å‡∏£‡∏∞‡∏ä‡∏±‡∏ö:
\"{question}\""""
)

print(response)

‡∏Ñ‡∏∏‡∏ì‡∏ï‡πâ‡∏≠‡∏á‡∏ñ‡∏π‡∏Å‡∏•‡∏ö‡πÅ‡∏•‡∏∞‡πÅ‡∏™‡∏î‡∏á‡∏ö·ªô‡∏™‡πà‡∏ß‡∏ô Pilot Burner Pressure Regulator Assembly ‡πÉ‡∏´‡πâ‡πÄ‡∏õ‡πá‡∏ô‡∏à‡∏±‡∏á‡∏´‡∏ß‡∏∞‡πÑ‡∏î‡πâ‡πÇ‡∏î‡∏¢‡∏ó‡∏≥‡∏ï‡∏≤‡∏°‡∏Ç‡∏±‡πâ‡∏ô‡∏ï‡∏≠‡∏ô suivants:

1. ‡πÉ‡∏´‡πâ‡πÉ‡∏ä‡πâ‡∏î‡∏µ‡πÄ‡∏£‡∏Ñ‡∏ã‡∏¥‡∏•‡∏¥‡∏Å‡πã‡∏≠ Solvent P-D-680, Type II ‡∏´‡∏£‡∏∑‡∏≠ III ‡∏°‡∏≤‡∏•‡∏á‡∏ö‡∏£‡∏¥‡∏ß‡∏≤‡∏à‡∏£‡∏≤‡∏Å‡∏≤‡∏£‡∏ó‡∏≥‡∏™‡∏∞‡∏≠‡∏≤‡∏î‡∏ó‡∏±‡πà‡∏ß‡∏ô‡∏¥‡πâ‡∏ß‡πÅ‡∏™‡∏î‡∏á‡∏ú‡∏•‡∏Å‡∏•‡∏∏‡πà‡∏°‡πÅ‡∏™‡∏î‡∏á
2. ‡∏ñ‡∏π‡∏Å‡∏•‡∏ö‡πÅ‡∏•‡∏∞‡πÄ‡∏õ‡∏µ‡πã‡∏¢‡∏ö‡∏≤‡∏ô‡∏ú‡∏π‡∏Å (lockwiring) ‡∏Ç‡∏≠‡∏á‡∏ö‡∏£‡∏¥‡∏ß‡∏≤‡∏à‡∏£‡∏≤‡∏Å‡∏≤‡∏£‡πÇ‡∏î‡∏¢‡∏Ñ‡∏≥‡∏ô‡∏∂‡∏á‡πÅ‡∏•‡∏∞‡πÑ‡∏°‡πà‡πÉ‡∏ä‡πâ‡∏ó‡∏≥‡πÉ‡∏´‡πâ‡∏û‡∏±‡∏Å‡πÜ ‡∏ã‡∏∂‡πà‡∏á‡∏à‡∏∞‡πÄ‡∏õ‡πá‡∏ô‡∏Å‡∏≤‡∏£‡∏ï‡∏±‡∏î
3. ‡∏ñ‡∏π‡∏Å‡∏•‡∏ö‡∏ö‡∏£‡∏¥‡∏ß‡∏≤‡∏à‡∏£‡∏≤‡∏Å‡∏≤‡∏£ Pilot Burner Pressure Regulator Assembly (1, Figure 3-1) ‡πÇ‡∏î‡∏¢‡∏ó‡∏≥‡∏ï‡∏≤‡∏°‡∏Ç‡∏±‡πâ‡∏ô‡∏ï‡∏≠‡∏ô berikut:
    a. ‡∏ñ‡∏π‡∏Å‡∏•‡∏ö body (14, Figure 3-2) ‡∏à‡∏≤‡∏Å control hous- ing. ‡∏™‡πà‡∏ß‡∏ô‡∏ó‡∏µ‡πà‡∏Ñ‡∏á‡∏≠‡∏¢‡∏π‡πà‡πÉ‡∏ô body (14) ‡∏Ñ‡