In [1]:
import fitz  # PyMuPDF
import json

pdf_path = "AFTERBURNER CONTROL_Chapter 3.pdf"
doc = fitz.open(pdf_path)

page_data = []

for i in range(len(doc)):
    text = doc[i].get_text()
    page_data.append({
        "page": i + 1,
        "text": text
    })

# save as JSON
with open("extracted_text.json", "w", encoding="utf-8") as f:
    json.dump(page_data, f, ensure_ascii=False, indent=2)

print(f"‚úÖ Extracted {len(page_data)} pages.")


‚úÖ Extracted 22 pages.


In [None]:
import json
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Load JSON dataset
with open("extracted_text.json", "r", encoding="utf-8") as f:
    pages = json.load(f)

# define chunks 
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 500,
    chunk_overlap = 50
)

docs = []
for p in pages:
    chunks = text_splitter.split_text(p["text"])
    for chunk in chunks:
        doc = Document(
            page_content=chunk,
            metadata={"page": p["page"]}
        )
        docs.append(doc)

print(f"‚úÖ Total chunks: {len(docs)}")


‚úÖ Total chunks: 82


In [4]:
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

# multilingual model for Thai
embedding_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
)

# FAISS index: Facebook AI Similarity Search
db = FAISS.from_documents(docs, embedding_model)


db.save_local("faiss_afterburner_index")

print("‚úÖ FAISS vectorstore saved.")


  embedding_model = HuggingFaceEmbeddings(
  from .autonotebook import tqdm as notebook_tqdm


‚úÖ FAISS vectorstore saved.


In [6]:
# Load index
db = FAISS.load_local(
    "faiss_afterburner_index",
    embedding_model,
    allow_dangerous_deserialization=True
)


query = "‡∏Å‡∏≤‡∏£‡∏ñ‡∏≠‡∏î metering valve ‡∏ó‡∏≥‡∏≠‡∏¢‡πà‡∏≤‡∏á‡πÑ‡∏£"
results = db.similarity_search(query, k=2)

for i, r in enumerate(results, 1):
    print(f"üîé Chunk {i} (‡∏´‡∏ô‡πâ‡∏≤ {r.metadata['page']}):") # which page of chunk
    print(r.page_content[:500])
    print("-" * 80)


üîé Chunk 1 (‡∏´‡∏ô‡πâ‡∏≤ 15):
Remove bypass valve (11) and insert (9) only if
screw (84) and preformed packings (86, 87). Break away
clogged or damaged. Remove standpipe (14)
torque shall be 2.0 pound-inch minimum when removing
and insert (15) only if damaged. Breakaway
linkage trim screw. Replace insert (15, Figure 3-15) if
torque shall be 4.0 pound-inch minimum when
breakaway torque is below limit.
removing bypass valve (11). Replace insert (9)
if breakaway torque is below limit.
3.20
REMOVAL OF NOZZLE CONTROL LIMIT
--------------------------------------------------------------------------------
üîé Chunk 2 (‡∏´‡∏ô‡πâ‡∏≤ 12):
pull out spacers (38) from bore of linkage
Exercise care not to overstretch the bellows
bracket.
while removing, as permanent deformation will
(11)
Carefully slide pin (40) out of the shaft of
occur.
sensor bellows (45); then remove screws (41).
(13)
Remove sensor bellows. After pin (Figure 3-9,
Removal torque for screws (41) shall be 1.5
40) has been removed

In [7]:
from langchain_community.llms import Ollama
from langchain.chains import RetrievalQA


llm = Ollama(model="mistral")


  llm = Ollama(model="mistral")


In [None]:
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings


embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
db = FAISS.load_local("faiss_afterburner_index", embedding_model, allow_dangerous_deserialization=True)

# Retrieval chain (good for untouch data)
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=db.as_retriever())


In [17]:
question = "‡∏Ç‡∏±‡πâ‡∏ô‡∏ï‡∏≠‡∏ô‡∏ñ‡∏≠‡∏î pilot burner regulator ‡∏ï‡πâ‡∏≠‡∏á‡∏ó‡∏≥‡∏≠‡∏¢‡πà‡∏≤‡∏á‡πÑ‡∏£"

response = qa_chain.run(
    f"""‡∏Ñ‡∏∏‡∏ì‡∏Ñ‡∏∑‡∏≠‡∏ú‡∏π‡πâ‡∏ä‡πà‡∏ß‡∏¢‡∏î‡πâ‡∏≤‡∏ô‡πÄ‡∏ó‡∏Ñ‡∏ô‡∏¥‡∏Ñ‡∏†‡∏≤‡∏©‡∏≤‡πÑ‡∏ó‡∏¢ ‡∏ä‡πà‡∏ß‡∏¢‡∏ï‡∏≠‡∏ö‡∏Ñ‡∏≥‡∏ñ‡∏≤‡∏°‡∏ï‡πà‡∏≠‡πÑ‡∏õ‡∏ô‡∏µ‡πâ‡∏≠‡∏¢‡πà‡∏≤‡∏á‡∏Å‡∏£‡∏∞‡∏ä‡∏±‡∏ö:
\"{question}\""""
)

print(response)


‡πÄ‡∏û‡∏∑‡πà‡∏≠‡∏ñ‡∏≠‡∏î Pilot Burner Regulator, ‡∏Ñ‡∏∏‡∏ì‡∏ï‡πâ‡∏≠‡∏á‡∏ó‡∏≥‡∏ï‡∏≤‡∏°‡∏Ç‡∏±‡πâ‡∏ô‡∏ï‡∏≠‡∏ô berikut:
1. ‡∏ã‡πà‡∏≠‡∏°‡πÉ‡∏´‡πâ‡∏™‡∏∞‡∏≠‡∏≤‡∏î‡∏™‡πà‡∏ß‡∏ô‡πÅ‡∏¢‡∏Å‡∏Å‡∏±‡∏ô‡πÇ‡∏î‡∏¢‡πÉ‡∏ä‡πâ‡∏™‡∏£‡∏µ‡πÄ‡∏õ‡∏¥‡∏î-cleaner P-D-680, Type II or III.
2. ‡∏ñ‡∏π‡∏Å‡πÑ‡∏õ‡∏Ñ‡∏∑‡∏ô‡πÅ‡∏•‡∏∞‡πÅ‡∏î‡∏Å‡∏ó‡∏∏‡∏Å‡∏™‡πà‡∏ß‡∏ô‡∏ó‡∏µ‡πà‡∏≠‡∏¢‡∏π‡πà‡∏ö‡∏ô‡∏†‡∏≤‡∏û 3-1 ‡∏ï‡∏≤‡∏°‡∏ï‡∏±‡∏ß‡∏≠‡∏ò‡∏¥‡∏ö‡∏≤‡∏¢:
   a. ‡∏ô‡∏≥‡∏•‡πá‡∏≠‡∏Ñ‡πÑ‡∏ß‡∏£‡∏¥‡∏á‡∏≠‡∏≠‡∏Å‡πÅ‡∏•‡πâ‡∏ß‡∏à‡∏±‡∏î‡∏ó‡∏≥‡πÉ‡∏´‡∏°‡πà‡πÇ‡∏î‡∏¢‡∏ó‡∏µ‡πà‡πÑ‡∏°‡πà‡πÉ‡∏ä‡πà‡∏Å‡∏≤‡∏£‡∏Ç‡∏±‡∏î‡∏•‡∏≠‡∏Å‡πÑ‡∏ß‡∏£‡∏¥‡∏á
   b. ‡∏ï‡∏±‡∏î‡∏™‡πà‡∏ß‡∏ô‡∏•‡πá‡∏≠‡∏Ñ‡πÑ‡∏ß‡∏£‡∏¥‡∏á‡∏≠‡∏≠‡∏Å
3. ‡πÅ‡∏Å‡πâ‡∏õ‡∏∏‡πà‡∏°‡∏´‡∏•‡∏±‡∏Å (14, Figure 3-2) ‡∏à‡∏≤‡∏Å‡∏ä‡πà‡∏≠‡∏ó‡∏µ‡πà‡∏ö‡∏≥‡∏Å‡∏∞‡πÄ‡∏†‡∏≤
   - ‡∏Ç‡∏ì‡∏∞‡∏ô‡∏µ‡πâ‡∏à‡∏∞‡πÄ‡∏´‡πá‡∏ô‡∏ß‡πà‡∏≤‡∏™‡πà‡∏ß‡∏ô‡∏•‡∏∂‡∏Å‡∏ô‡∏µ‡πâ‡∏°‡∏µ‡∏™‡πà‡∏ß‡∏ô‡∏ó‡∏µ‡πà‡πÅ‡∏õ‡∏•‡∏á‡∏ä‡∏∑‡πà‡∏≠ 14
4. ‡∏ñ‡∏π‡∏Å‡πÑ‡∏õ‡∏Ñ‡∏∑‡∏ô‡πÅ‡∏•‡πâ‡∏ß‡∏à‡∏±‡∏î‡∏ó‡∏≥‡∏†‡∏≤‡∏û 3-5
   a. ‡∏ô‡∏≥‡∏£‡∏≠‡πÄ‡∏ï‡∏≠‡∏£‡πå (2, Figure 3-5) ‡∏≠‡∏≠‡∏Å
 