In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import pandas as pd
import json

In [4]:
# from contextEnrichment_function.gpt_contextualized import gpt_batch_process
from contextEnrichment_function.anthropic_contextualized import anthropic_batch_process

In [6]:
def pipeline_contextEnrichment(prepared_data_dir):

    # อ่านรายการโรงงานจากไฟล์ CSV
    file_name = "plant_list.csv"
    plant_list_file = os.path.join(prepared_data_dir, file_name)
    if os.path.isfile(plant_list_file):
        plant_df = pd.read_csv(plant_list_file)

    else:
        print(f":: Failed ❌")
        print(f"File not found: {plant_list_file}")
        return  # ออกจากฟังก์ชันถ้าไม่พบไฟล์

    # วนลูปผ่านแต่ละโรงงาน
    for _, plant_row in plant_df.iterrows():
        plant_tag = plant_row["PLANT_TAG"]
        plant_name = plant_row["PLANT_NAME"]

        # อ่านรายการเครื่องจักรของแต่ละโรงงาน
        file_name = "machine_list.csv"
        machine_list_file = os.path.join(prepared_data_dir, plant_tag, file_name)
        if os.path.isfile(machine_list_file):
            machine_df = pd.read_csv(machine_list_file)
        else:
            print(f":: Failed ❌")
            print(f"File not found: {machine_list_file}")
            break

        # วนลูปผ่านแต่ละเครื่องจักร
        for idx, machine_row in machine_df.iterrows():
            machine_tag = machine_row["MACHINE_TAG"]
            machine_name = machine_row["MACHINE_NAME"]
            print("\n" + "=" * 100)
            print(
                f"#{idx+1} Processing data for {plant_name} (TAG: {plant_tag}) - {machine_name} (TAG: {machine_tag})"
            )
            print("=" * 100)

            # เพิ่มบริบทให้กับเนื้อหาชิ้นต่างๆ
            file_name = f"{plant_tag}_{machine_tag}_chunks.json"
            print(f"\n>> Add contextualization to chunk content - File: {file_name}")
            chunks_file = os.path.join(
                prepared_data_dir, plant_tag, machine_tag, file_name
            )
            if os.path.isfile(chunks_file):

                # Read the JSON file
                with open(chunks_file, "r", encoding="utf-8") as file:
                    chunks_data = json.load(file)
                    print(f":: Read the Chunks data JSON Complete ✔️ - Path: {chunks_file}")

                # เรียกใช้งานฟังก์ชั่นเพื่อใช้ anthropic AI ในการสร้างบริบท
                contextualized_data = anthropic_batch_process(chunks_data)

                # เขียนข้อมูลที่ประมวลผลแล้วกลับลงไฟล์
                contextualized_full_file_path = chunks_file.replace(
                    ".json", "_contextualized.json"
                )
                with open(contextualized_full_file_path, "w", encoding="utf-8") as file:
                    json.dump(contextualized_data, file, ensure_ascii=False, indent=2)
                print(f":: Writing processed data Complete ✔️")

            else:
                print(f":: Failed ❌")
                print(f"File not found: {chunks_file}")

            print("=" * 100)
            print("Complete all Process")
            print("=" * 100)

In [7]:
# กำหนด root directory หลักของโปรเจค
ROOT_DIRECTORY = "D:\Data_sci_internship\Exploring Generative AI for Predictive Maintenance Applications"

# กำหนดชื่อโฟลเดอร์ย่อยต่างๆ
PROJECT_DIRECTORY = "predictive-maintenance-chatbot"    # โฟลเดอร์โปรเจค
DATA_ROOT_DIRECTORY = "data"                            # โฟลเดอร์หลักสำหรับเก็บข้อมูล
PREPARED_DATA_DIRECTORY = "prepared_data"               # โฟลเดอร์สำหรับข้อมูลที่ประมวลผลแล้ว

# สร้าง path เต็มสำหรับโฟลเดอร์ข้อมูลที่ประมวลผลแล้ว
prepared_data_dir = os.path.join(
    ROOT_DIRECTORY, PROJECT_DIRECTORY, DATA_ROOT_DIRECTORY, PREPARED_DATA_DIRECTORY
)

# Call the pipeline_numeric2text function
pipeline_contextEnrichment(prepared_data_dir)


#1 Processing data for Natural Gas Processing Plant (TAG: PLANT_01) - Sale Gas Compressor (TAG: COMP_SG01)

>> Add contextualization to chunk content - File: PLANT_01_COMP_SG01_chunks.json
:: Read the Chunks data JSON Complete ✔️ - Path: D:\Data_sci_internship\Exploring Generative AI for Predictive Maintenance Applications\predictive-maintenance-chatbot\data\prepared_data\PLANT_01\COMP_SG01\PLANT_01_COMP_SG01_chunks.json
:: Read the Corpus source Complete ✔️ - Path: D:\Data_sci_internship\Exploring Generative AI for Predictive Maintenance Applications\predictive-maintenance-chatbot\data\prepared_data\PLANT_01\COMP_SG01\PLANT_01_COMP_SG01_corpus.txt


:: Processing chunks: 100%|██████████| 178/178 [1:09:57<00:00, 23.58s/it]


:: Writing processed data Complete ✔️
Complete all Process

#1 Processing data for Everflow Utility Plant (TAG: PLANT_02) - Dual Fuel Generator A (TAG: GEN_DF_01)

>> Add contextualization to chunk content - File: PLANT_02_GEN_DF_01_chunks.json
:: Read the Chunks data JSON Complete ✔️ - Path: D:\Data_sci_internship\Exploring Generative AI for Predictive Maintenance Applications\predictive-maintenance-chatbot\data\prepared_data\PLANT_02\GEN_DF_01\PLANT_02_GEN_DF_01_chunks.json
:: Read the Corpus source Complete ✔️ - Path: D:\Data_sci_internship\Exploring Generative AI for Predictive Maintenance Applications\predictive-maintenance-chatbot\data\prepared_data\PLANT_02\GEN_DF_01\PLANT_02_GEN_DF_01_corpus.txt


:: Processing chunks: 100%|██████████| 59/59 [16:41<00:00, 16.97s/it]   


:: Writing processed data Complete ✔️
Complete all Process

#2 Processing data for Everflow Utility Plant (TAG: PLANT_02) - Dual Fuel Generator B (TAG: GEN_DF_02)

>> Add contextualization to chunk content - File: PLANT_02_GEN_DF_02_chunks.json
:: Read the Chunks data JSON Complete ✔️ - Path: D:\Data_sci_internship\Exploring Generative AI for Predictive Maintenance Applications\predictive-maintenance-chatbot\data\prepared_data\PLANT_02\GEN_DF_02\PLANT_02_GEN_DF_02_chunks.json
:: Read the Corpus source Complete ✔️ - Path: D:\Data_sci_internship\Exploring Generative AI for Predictive Maintenance Applications\predictive-maintenance-chatbot\data\prepared_data\PLANT_02\GEN_DF_02\PLANT_02_GEN_DF_02_corpus.txt


:: Processing chunks: 100%|██████████| 59/59 [07:47<00:00,  7.93s/it]


:: Writing processed data Complete ✔️
Complete all Process

#3 Processing data for Everflow Utility Plant (TAG: PLANT_02) - Produce Water Injection Pump A (TAG: PMP_WI_01)

>> Add contextualization to chunk content - File: PLANT_02_PMP_WI_01_chunks.json
:: Read the Chunks data JSON Complete ✔️ - Path: D:\Data_sci_internship\Exploring Generative AI for Predictive Maintenance Applications\predictive-maintenance-chatbot\data\prepared_data\PLANT_02\PMP_WI_01\PLANT_02_PMP_WI_01_chunks.json
:: Read the Corpus source Complete ✔️ - Path: D:\Data_sci_internship\Exploring Generative AI for Predictive Maintenance Applications\predictive-maintenance-chatbot\data\prepared_data\PLANT_02\PMP_WI_01\PLANT_02_PMP_WI_01_corpus.txt


:: Processing chunks: 100%|██████████| 10/10 [00:17<00:00,  1.75s/it]


:: Writing processed data Complete ✔️
Complete all Process

#4 Processing data for Everflow Utility Plant (TAG: PLANT_02) - Produce Water Injection Pump B (TAG: PMP_WI_02)

>> Add contextualization to chunk content - File: PLANT_02_PMP_WI_02_chunks.json
:: Read the Chunks data JSON Complete ✔️ - Path: D:\Data_sci_internship\Exploring Generative AI for Predictive Maintenance Applications\predictive-maintenance-chatbot\data\prepared_data\PLANT_02\PMP_WI_02\PLANT_02_PMP_WI_02_chunks.json
:: Read the Corpus source Complete ✔️ - Path: D:\Data_sci_internship\Exploring Generative AI for Predictive Maintenance Applications\predictive-maintenance-chatbot\data\prepared_data\PLANT_02\PMP_WI_02\PLANT_02_PMP_WI_02_corpus.txt


:: Processing chunks: 100%|██████████| 10/10 [00:15<00:00,  1.54s/it]


:: Writing processed data Complete ✔️
Complete all Process

#5 Processing data for Everflow Utility Plant (TAG: PLANT_02) - Produce Water Injection Pump C (TAG: PMP_WI_03)

>> Add contextualization to chunk content - File: PLANT_02_PMP_WI_03_chunks.json
:: Read the Chunks data JSON Complete ✔️ - Path: D:\Data_sci_internship\Exploring Generative AI for Predictive Maintenance Applications\predictive-maintenance-chatbot\data\prepared_data\PLANT_02\PMP_WI_03\PLANT_02_PMP_WI_03_chunks.json
:: Read the Corpus source Complete ✔️ - Path: D:\Data_sci_internship\Exploring Generative AI for Predictive Maintenance Applications\predictive-maintenance-chatbot\data\prepared_data\PLANT_02\PMP_WI_03\PLANT_02_PMP_WI_03_corpus.txt


:: Processing chunks: 100%|██████████| 10/10 [00:17<00:00,  1.73s/it]


:: Writing processed data Complete ✔️
Complete all Process

#6 Processing data for Everflow Utility Plant (TAG: PLANT_02) - Water process (TAG: SYS_WP_01)

>> Add contextualization to chunk content - File: PLANT_02_SYS_WP_01_chunks.json
:: Read the Chunks data JSON Complete ✔️ - Path: D:\Data_sci_internship\Exploring Generative AI for Predictive Maintenance Applications\predictive-maintenance-chatbot\data\prepared_data\PLANT_02\SYS_WP_01\PLANT_02_SYS_WP_01_chunks.json
:: Read the Corpus source Complete ✔️ - Path: D:\Data_sci_internship\Exploring Generative AI for Predictive Maintenance Applications\predictive-maintenance-chatbot\data\prepared_data\PLANT_02\SYS_WP_01\PLANT_02_SYS_WP_01_corpus.txt


:: Processing chunks: 100%|██████████| 8/8 [00:13<00:00,  1.67s/it]

:: Writing processed data Complete ✔️
Complete all Process





In [23]:
# data_json = r"D:\Data_sci_internship\Exploring Generative AI for Predictive Maintenance Applications\predictive-maintenance-chatbot\data\prepared_data\PLANT_02\SYSTEM\PLANT_02_SYSTEM_chunks_contextualized.json"
# with open(data_json, "r", encoding="utf-8") as file:
#     contextualized_data = json.load(file)
#     print(":: Complete ✔️")

# for data in contextualized_data:
#     chunks = data["chunks"]
#     corpus_source_file = data["corpus_source"]

#     if os.path.isfile(corpus_source_file):
#         with open(corpus_source_file, "r") as file:
#             corpus_content = file.read()
#             print(f":: Read the Corpus source Complete ✔️ - Path: {corpus_source_file}")
#     else:
#         print(f":: Failed ❌ - File not found: {corpus_source_file}")

#     for chunk in chunks:
#         chunk_content = chunk["content"]
#         contextualized_content = chunk["contextualized_content"]
#         print("=" * 100)
#         print(contextualized_content)

---
