In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import json
from RAG_function.build_index.bm25_build import bm25_build
from RAG_function.build_index.faiss_build import FAISS_build
from RAG_function.RRF import reciprocal_rank_fusion
from RAG_function.re_ranking import re_ranking
from RAG_function.generate.anthropicGenerate import anthropic_chat, ConversationHistory

In [3]:
# กำหนด root directory หลักของโปรเจค
ROOT_DIRECTORY = "D:\Data_sci_internship\Exploring Generative AI for Predictive Maintenance Applications"

# กำหนดชื่อโฟลเดอร์ย่อยต่างๆ
PROJECT_DIRECTORY = "predictive-maintenance-chatbot"    # โฟลเดอร์โปรเจค
DATA_ROOT_DIRECTORY = "data"                            # โฟลเดอร์หลักสำหรับเก็บข้อมูล
PREPARED_DATA_DIRECTORY = "prepared_data"               # โฟลเดอร์สำหรับข้อมูลที่ประมวลผลแล้ว
BUILDING_ROOT_DIRECTORY = "building-knowledge-base"

## Indexing

Building Best Matching 25 (BM25)

In [4]:
BUILD_ROOT_DIRECTOR = "build_1"
INDEX_ROOT_DIRECTOR = "bm25_index"
CONTEX_DIRECTOR = "contextualized_index"

bm25Index_dir = os.path.join(
    ROOT_DIRECTORY,
    PROJECT_DIRECTORY,
    DATA_ROOT_DIRECTORY,
    BUILDING_ROOT_DIRECTORY,
    BUILD_ROOT_DIRECTOR,
    INDEX_ROOT_DIRECTOR,
    CONTEX_DIRECTOR,
)

# สร้างอินสแตนซ์ของคลาส
bm25_store = bm25_build()  
# โหลด BM25 model และ documents
bm25_store.load_bm25_store(bm25Index_dir)

Directory exists: D:\Data_sci_internship\Exploring Generative AI for Predictive Maintenance Applications\predictive-maintenance-chatbot\data\building-knowledge-base\build_1\bm25_index\contextualized_index
Files in directory:
- BM25Model.pkl
Loaded BM25 model from D:\Data_sci_internship\Exploring Generative AI for Predictive Maintenance Applications\predictive-maintenance-chatbot\data\building-knowledge-base\build_1\bm25_index\contextualized_index\BM25Model.pkl
- documents.json
- tokenized_corpus.json
BM25 model and documents loaded successfully.


Building Facebook AI Similarity Search (FASS) 

In [5]:
BUILD_ROOT_DIRECTOR = "build_1"
INDEX_ROOT_DIRECTOR = "faiss_index"
CONTEX_DIRECTOR = "contextualized_index"

faissIndex_dir = os.path.join(
    ROOT_DIRECTORY,
    PROJECT_DIRECTORY,
    DATA_ROOT_DIRECTORY,
    BUILDING_ROOT_DIRECTORY,
    BUILD_ROOT_DIRECTOR,
    INDEX_ROOT_DIRECTOR,
    CONTEX_DIRECTOR,
)

# สร้างอินสแตนซ์ของคลาส
vector_store = FAISS_build()

# โหลด BM25 model และ documents
vector_store.load_vector_store(faissIndex_dir)

Directory exists: D:\Data_sci_internship\Exploring Generative AI for Predictive Maintenance Applications\predictive-maintenance-chatbot\data\building-knowledge-base\build_1\faiss_index\contextualized_index
Build Root directory: D:\Data_sci_internship\Exploring Generative AI for Predictive Maintenance Applications\predictive-maintenance-chatbot\data\building-knowledge-base\build_1
ModelHub directory found: D:\Data_sci_internship\Exploring Generative AI for Predictive Maintenance Applications\predictive-maintenance-chatbot\data\building-knowledge-base\build_1\ModelHub
Models found: D:\Data_sci_internship\Exploring Generative AI for Predictive Maintenance Applications\predictive-maintenance-chatbot\data\building-knowledge-base\build_1\ModelHub\models--sentence-transformers--all-mpnet-base-v2\snapshots\9a3225965996d404b775526de6dbfe85d3368642
Embeddings loaded successfully.
Embeddings are ready to use.
Loaded vector store from D:\Data_sci_internship\Exploring Generative AI for Predictive M

## Search

BM25

In [6]:
query = "What is the current health score of the pump head for P-3410C?"
top_k = 10

print(f"Top {top_k} BM25 search:")
print("Query: ")
print(query)
print("=" * 50)

# ค้นหาเอกสารที่ตรงกับ query โดยใช้ BM25
top_bm25_documents_search = bm25_store.bestMatching_search(query, top_k)

SEARCH_RESULTS_DIRECTORY = os.path.join(
    ROOT_DIRECTORY, PROJECT_DIRECTORY, "search_results"
)
os.makedirs(SEARCH_RESULTS_DIRECTORY, exist_ok=True)
bm25Search_results_to_dir = os.path.join(
    SEARCH_RESULTS_DIRECTORY, "top_bm25_documents_search.json"
)

print(f"Top {top_k} BM25 search results:")

# แสดงผลลัพธ์การค้นหา
for i, doc in enumerate(top_bm25_documents_search):
    print("-" * 50)
    print(f"page_content: {doc['page_content']}")
    print(f"bm25_score: {doc['bm25_score']}")
    print(f"rank: {doc['rank']}")
    print(f"content_tokenizer: {doc['content_tokenizer']}")

# บันทึกผลลัพธ์การค้นหา
with open(bm25Search_results_to_dir, "w", encoding="utf-8") as f:
    json.dump(top_bm25_documents_search, f, ensure_ascii=False, indent=4)

print("=" * 50)
print("Save results BestMatching Search (BM25) to: ", bm25Search_results_to_dir)


Top 10 BM25 search:
Query: 
What is the current health score of the pump head for P-3410C?
Top 10 BM25 search results:
--------------------------------------------------
page_content: The PUMP component of the Produce Water Injection Pump C (Tag: P-3410C) is monitored by the Pump's Head (Dis Press - Suc. Press)(Pump head A) performance model, which indicates a 100.0% health score with consistent 0.12% daily, weekly, and monthly changes. This suggests the pump is operating within normal parameters and does not require immediate maintenance, though ongoing monitoring of the pump head is recommended to ensure continued reliable performance.

The PUMP component of Produce Water Injection Pump C (Tag: P-3410C) in the Everflow Utility Plant is monitored by the model Pump s Head (Dis Press - Suc. Press)(Pump head A), which is a PERFORMANCE model of INDIVIDUAL class. This model has a health score of 100.0%, with health changes recorded at 0.12% daily, 0.12% weekly, and 0.12% monthly.
bm25_scor

Vector

In [7]:
query = "What is the current health score of the pump head for P-3410C?"
top_k = 10

print(f"Top {top_k} BM25 search:")
print("Query: ")
print(query)
print("=" * 50)

# ค้นหาเอกสารที่ตรงกับ query โดยใช้ Vector Search
top_vector_documents_search = vector_store.vector_search(query, top_k)

SEARCH_RESULTS_DIRECTORY = os.path.join(
    ROOT_DIRECTORY, PROJECT_DIRECTORY, "search_results"
)
os.makedirs(SEARCH_RESULTS_DIRECTORY, exist_ok=True)
vectorSearch_results_to_dir = os.path.join(
    SEARCH_RESULTS_DIRECTORY, "top_vector_documents_search.json"
)

print(f"Top {top_k} Vector search results:")

# แสดงผลลัพธ์การค้นหา Vector
for i, doc in enumerate(top_vector_documents_search):
    print("-" * 50)
    print(f"page_content: {doc['page_content']}")
    print(f"distance_score: {doc['distance_score']}")
    print(f"rank: {doc['rank']}")

# บันทึกผลลัพธ์การค้นหา
with open(vectorSearch_results_to_dir, "w", encoding="utf-8") as f:
    json.dump(top_vector_documents_search, f, ensure_ascii=False, indent=4)

print("-" * 50)
print("Save results Vector Search (FAISS) to: ", vectorSearch_results_to_dir)


Top 10 BM25 search:
Query: 
What is the current health score of the pump head for P-3410C?
Top 10 Vector search results:
--------------------------------------------------
page_content: The PUMP component of the Produce Water Injection Pump C (Tag: P-3410C) is monitored by the Pump's Head (Dis Press - Suc. Press)(Pump head A) performance model, which indicates a 100.0% health score with consistent 0.12% daily, weekly, and monthly changes. This suggests the pump is operating within normal parameters and does not require immediate maintenance, though ongoing monitoring of the pump head is recommended to ensure continued reliable performance.

The PUMP component of Produce Water Injection Pump C (Tag: P-3410C) in the Everflow Utility Plant is monitored by the model Pump s Head (Dis Press - Suc. Press)(Pump head A), which is a PERFORMANCE model of INDIVIDUAL class. This model has a health score of 100.0%, with health changes recorded at 0.12% daily, 0.12% weekly, and 0.12% monthly.
distanc

## Rank Fusion

In [8]:
query = "What is the current health score of the pump head for P-3410C?"

top_k = 10
# ค้นหาเอกสารที่ตรงกับ query โดยใช้ BM25
top_bm25_documents_search = bm25_store.bestMatching_search(query, top_k)
# ค้นหาเอกสารที่ตรงกับ query โดยใช้ Vector Search
top_vector_documents_search = vector_store.vector_search(query, top_k)

# รวมผลลัพธ์การค้นหาจาก BM25 และ Vector Search
top_k = 7
top_rrf_documents = reciprocal_rank_fusion(top_vector_documents_search, top_bm25_documents_search, top_k)

SEARCH_RESULTS_DIRECTORY = os.path.join(
    ROOT_DIRECTORY, PROJECT_DIRECTORY, "search_results"
)
os.makedirs(SEARCH_RESULTS_DIRECTORY, exist_ok=True)
top_rrf_results_to_dir = os.path.join(
    SEARCH_RESULTS_DIRECTORY, "top_rrf_documents.json"
)

print(f"Top {top_k} Vector search results:")

# แสดงผลลัพธ์การจัดอันดับใหม่ด้วย Reciprocal rank fusion (RRF)
for i, doc in enumerate(top_rrf_documents):
    print("-" * 50)
    print(f"page_content: {doc['page_content']}")
    print(f"rrf_score: {doc['rrf_score']}")
    print(f"rank: {doc['rank']}")

# บันทึกผลลัพธ์การจัดอันดับใหม่ด้วย Reciprocal rank fusion (RRF)
with open(top_rrf_results_to_dir, "w", encoding="utf-8") as f:
    json.dump(top_rrf_documents, f, ensure_ascii=False, indent=4)

print("-" * 50)
print("Save results Reciprocal rank fusion (RRF) to: ", top_rrf_results_to_dir)


Top 7 Vector search results:
--------------------------------------------------
page_content: The PUMP component of the Produce Water Injection Pump C (Tag: P-3410C) is monitored by the Pump's Head (Dis Press - Suc. Press)(Pump head A) performance model, which indicates a 100.0% health score with consistent 0.12% daily, weekly, and monthly changes. This suggests the pump is operating within normal parameters and does not require immediate maintenance, though ongoing monitoring of the pump head is recommended to ensure continued reliable performance.

The PUMP component of Produce Water Injection Pump C (Tag: P-3410C) in the Everflow Utility Plant is monitored by the model Pump s Head (Dis Press - Suc. Press)(Pump head A), which is a PERFORMANCE model of INDIVIDUAL class. This model has a health score of 100.0%, with health changes recorded at 0.12% daily, 0.12% weekly, and 0.12% monthly.
rrf_score: 0.032786886
rank: 1
--------------------------------------------------
page_content: The

## re_ranking

In [10]:
query = "What is the current health score of the pump head for P-3410C?"

top_k = 10
# ค้นหาเอกสารที่ตรงกับ query โดยใช้ BM25
top_bm25_documents_search = bm25_store.bestMatching_search(query, top_k)
# ค้นหาเอกสารที่ตรงกับ query โดยใช้ Vector Search
top_vector_documents_search = vector_store.vector_search(query, top_k)
# รวมผลลัพธ์การค้นหาจาก BM25 และ Vector Search
top_k = 7
top_rrf_documents = reciprocal_rank_fusion(top_vector_documents_search, top_bm25_documents_search, top_k)

# คำนวณคะแนนใหม่โดยใช้ re-ranking model
top_k = 5
rerank_documents = re_ranking(top_rrf_documents, query, top_k=5)

SEARCH_RESULTS_DIRECTORY = os.path.join(
    ROOT_DIRECTORY, PROJECT_DIRECTORY, "search_results"
)
os.makedirs(SEARCH_RESULTS_DIRECTORY, exist_ok=True)
rerank_results_to_dir = os.path.join(
    SEARCH_RESULTS_DIRECTORY, "rerank_documents.json"
)

print(f"Top {top_k} Re-rank documents:")

# แสดงผลลัพธ์การจัดอันดับใหม่ด้วย Reciprocal rank fusion (RRF)
for i, doc in enumerate(rerank_documents):
    print("-" * 50)
    print(f"page_content: {doc['page_content']}")
    print(f"similarity_score: {doc['similarity_score']}")
    print(f"rank: {doc['rank']}")

# บันทึกผลลัพธ์การจัดอันดับใหม่ด้วย Reciprocal rank fusion (RRF)
with open(rerank_results_to_dir, "w", encoding="utf-8") as f:
    json.dump(rerank_documents, f, ensure_ascii=False, indent=4)

print("-" * 50)
print("Save results Re-rank documents to: ", rerank_results_to_dir)


Top 5 Re-rank documents:
--------------------------------------------------
page_content: The PUMP component of the Produce Water Injection Pump C (Tag: P-3410C) is monitored by the Pump's Head (Dis Press - Suc. Press)(Pump head A) performance model, which indicates a 100.0% health score with consistent 0.12% daily, weekly, and monthly changes. This suggests the pump is operating within normal parameters and does not require immediate maintenance, though ongoing monitoring of the pump head is recommended to ensure continued reliable performance.

The PUMP component of Produce Water Injection Pump C (Tag: P-3410C) in the Everflow Utility Plant is monitored by the model Pump s Head (Dis Press - Suc. Press)(Pump head A), which is a PERFORMANCE model of INDIVIDUAL class. This model has a health score of 100.0%, with health changes recorded at 0.12% daily, 0.12% weekly, and 0.12% monthly.
similarity_score: 0.93525803
rank: 1
--------------------------------------------------
page_content: T


## Generation

In [17]:
conversation_history = ConversationHistory()

# ทดสอบคำถามแรก
query = "What is the current health score of the pump head for P-3410C?"
response = anthropic_chat(
    rerank_documents=rerank_documents,
    query=query,
    conversation_history=conversation_history

)


=== Chat Information ===

1. Query:
What is the current health score of the pump head for P-3410C?

2. Conversation History:
[Empty History]

3. Augmented Documents:
<monitoring_data>
The PUMP component of the Produce Water Injection Pump C (Tag: P-3410C) is monitored by the Pump's Head (Dis Press - Suc. Press)(Pump head A) performance model, which indicates a 100.0% health score with consistent 0.12% daily, weekly, and monthly changes. This suggests the pump is operating within normal parameters and does not require immediate maintenance, though ongoing monitoring of the pump head is recommended to ensure continued reliable performance.

The PUMP component of Produce Water Injection Pump C (Tag: P-3410C) in the Everflow Utility Plant is monitored by the model Pump s Head (Dis Press - Suc. Press)(Pump head A), which is a PERFORMANCE model of INDIVIDUAL class. This model has a health score of 100.0%, with health changes recorded at 0.12% daily, 0.12% weekly, and 0.12% monthly.
</monito

In [18]:
print("User: ")
print(query)
print("AI Chat: ")
print(response[0].text)

User: 
What is the current health score of the pump head for P-3410C?
AI Chat: 
The current health score of the pump head for P-3410C is 100.0%. The monitoring data indicates the Pump's Head (Dis Press - Suc. Press)(Pump head A) performance model is showing consistent 0.12% daily, weekly, and monthly changes, suggesting the pump is operating within normal parameters.


In [19]:
query2 = "Is there any concerning trend in the motor's drive voltage that requires attention?"
response2 = anthropic_chat(
    rerank_documents=rerank_documents,
    query=query2,
    conversation_history=conversation_history

)


=== Chat Information ===

1. Query:
Is there any concerning trend in the motor's drive voltage that requires attention?

2. Conversation History:
user: What is the current health score of the pump head for P-3410C?
assistant: The current health score of the pump head for P-3410C is 100.0%. The monitoring data indicates the Pump's Head (Dis Press - Suc. Press)(Pump head A) performance model is showing consistent 0.12% daily, weekly, and monthly changes, suggesting the pump is operating within normal parameters.

3. Augmented Documents:
<monitoring_data>
The PUMP component of the Produce Water Injection Pump C (Tag: P-3410C) is monitored by the Pump's Head (Dis Press - Suc. Press)(Pump head A) performance model, which indicates a 100.0% health score with consistent 0.12% daily, weekly, and monthly changes. This suggests the pump is operating within normal parameters and does not require immediate maintenance, though ongoing monitoring of the pump head is recommended to ensure continued 

In [20]:
print("User: ")
print(query2)
print("AI Chat: ")
print(response2[0].text)

User: 
Is there any concerning trend in the motor's drive voltage that requires attention?
AI Chat: 
Based on the equipment monitoring data provided:

The Speed (Frequency x 60) model, which is a PERFORMANCE model of MACHINE_LEARNING class, indicates a potential issue with the pump's speed or efficiency. This model shows a significant performance degradation, with residual changes of -2.99% daily, -19.57% weekly, and -36.89% monthly.

This concerning trend in the pump's speed suggests the following:

1. Immediate Action Required:
   - Further investigate the cause of the speed/efficiency degradation, as this could lead to potential pump failure if left unaddressed.
   - Schedule a maintenance inspection to diagnose the issue and determine appropriate corrective actions.

2. Preventive Measures:
   - Implement a condition-based maintenance program to closely monitor the pump's speed and efficiency trends.
   - Consider adjusting the pump's operating parameters or performing preventive m