In [None]:
import pandas as pd
import concurrent.futures
import re
import json
from langchain_community.chat_models import ChatOllama 
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.prompts import PromptTemplate
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OllamaEmbeddings
from langchain.vectorstores import FAISS
import chardet
import os
from pathlib import Path
from typing import List
from langchain.docstore.document import Document as LangDocument

MASTER_JS = r"C:\users\tuf\documents\cloudysys_nickfury\dcmsln\dcmsln_201812\Master\PageJS\Query"
BUSINESSRULE_DIR = r"C:\Users\TUF\Documents\cloudysys_nickfury\dcmsln\BusinessRule\Commons"
VECTOR_STORE_PATH = "./common/vectorstore"
EMBEDDING_MODEL = "nomic-embed-text"


In [None]:
# RAG function
def remove_simplified_chinese(text: str) -> str:
    return re.sub(r"[^\x00-\x7F\u4e00-\u9fff\n\r\t\w\s.,:;!?(){}[\]\"'@#$%^&*\-+=\\/]", "", text)

def read_file_with_detected_encoding(file_path: str) -> str:
    print(file_path)

    with open(file_path, 'rb') as f:
        raw = f.read()
    return raw.decode('GB2312', errors='ignore')

def extract_vb_functions(file_path: str) -> List[LangDocument]:
    raw_code = read_file_with_detected_encoding(file_path)

    # 移除簡體中文註解
    cleaned_code = remove_simplified_chinese(raw_code)

    pattern = r"(Public\s+(Sub|Function)|Private\s+(Sub|Function)|Sub|Function)\s+\w+\s*\(.*?\)[\s\S]+?End\s+(Sub|Function)"
    matches = re.finditer(pattern, cleaned_code, re.MULTILINE | re.IGNORECASE)

    docs = []
    file_name = Path(file_path).name

    for match in matches:
        full_func = match.group(0).strip()

        # 擷取函式名稱
        func_name_match = re.search(r"(Sub|Function)\s+(\w+)", full_func)
        func_name = func_name_match.group(2) if func_name_match else "unknown"

        doc = LangDocument(
            page_content=full_func,
            metadata={
                "source": file_name,
                "function": func_name
            }
        )
        docs.append(doc)

    return docs
def collect_all_vb_chunks(directory: str) -> List[LangDocument]:
    all_docs = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith(".vb"):
                file_path = os.path.join(root, file)
                chunks = extract_vb_functions(file_path)
                all_docs.extend(chunks)
    return all_docs

def build_vector_store(docs: List[LangDocument]):
    print(f"[info] Building vectorstore with {len(docs)} code chunks...")
    embeddings = OllamaEmbeddings(model=EMBEDDING_MODEL)
    db = FAISS.from_documents(docs, embeddings)
    os.makedirs(VECTOR_STORE_PATH, exist_ok=True)
    db.save_local(VECTOR_STORE_PATH)
    print(f"[success] Vectorstore saved to: {VECTOR_STORE_PATH}")


In [None]:
# 建立RAG
# all_chunks = collect_all_vb_chunks(SOURCE_DIR)
# build_vector_store(all_chunks)

In [None]:
class MessageClassification(BaseModel):
    line: int = Field(...)
    original: str = Field(...)
    issue: str = Field(...)
    replacement: str = Field(...)
    reason: str = Field(...)

# Json 輸出格式解析器
parser = JsonOutputParser()
format_instructions = parser.get_format_instructions()

# 使用本地 LLM 模型
embed_model = OllamaEmbeddings(model="nomic-embed-text") 
fst_llm = ChatOllama(model="llama3:8B") #提出
sec_llm = ChatOllama(model="mistral:7B") #審查
third_llm = ChatOllama(model="phi3:3.8B") #檢查改動是否符合原始邏輯，若符合則發動投票

fst_prompt = PromptTemplate.from_template(
  """
你是一位 JavaScript 專家，負責將使用過時、不推薦的語法重構為現代瀏覽器相容的寫法。

請分析以下 JavaScript 原始碼片段，並使用 JSON 格式輸出需要重構的部分與建議的新寫法：

=== 原始碼 ===
{code}
===

請回傳格式如下（如無需修改則回傳空陣列）：
[
  {{
    "line": <問題行號>,
    "original": "<原始碼>",
    "issue": "<問題說明>",
    "replacement": "<改寫後的程式碼>",
    "reason": "<為何這樣修改>"
  }}
]
  """
)
sec_prompt = PromptTemplate.from_template("""
你是一位 JavaScript Linter 工具的專家。

請根據下列 JSON 內容，審查其中每一項修改提案是否合理，並針對語法正確性、現代相容性與實務可行性進行審查。

=== 修改提案 ===
{proposal}
===

請回傳以下 JSON 陣列，每個元素為一條審查意見：
[
  {{
    "line": <提案行號>,
    "vote": "approve" | "reject",
    "comment": "<說明你的理由>"
  }}
]
""" 
)
third_prompt = PromptTemplate.from_template("""
你是語意一致性檢查專家。

請比較以下原始碼與改寫碼，確認是否保留了相同的邏輯與功能。

=== 原始碼 ===
{original}

=== 改寫碼 ===
{rewritten}

請回傳以下格式：
{{
  "consistency": "high" | "medium" | "low",
  "comment": "<改寫是否符合原始語意？是否有任何遺漏？>"
}}
"""
)

summary_prompt = PromptTemplate.from_template("""
""")

In [None]:
# 生成.js
def write_modified_js(original_path: str, original_code: str, approved_changes: List[dict]):
    modified_code = original_code
    for item in approved_changes:
        modified_code = modified_code.replace(item["original"], item["replacement"])
    
    p = Path(original_path)
    output_path = p.with_name(f"{p.stem}_modified{p.suffix}")
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(modified_code)

# 審核json
def extract_json(text: str) -> list:
    try:
        # 找第一個 "[" 與最後一個 "]"
        start = text.find('[')
        end = text.rfind(']') + 1

        if start == -1 or end <= start:
            print("找不到 JSON 陣列")
            return []

        json_str = text[start:end]

        # 將反引號改為正常雙引號（處理 LLM 混 Markdown 輸出）
        json_str = json_str.replace("`", "\"")

        # 修復 JSON 常見錯誤：如 "replacement": "" (xxx)
        json_str = re.sub(r'("replacement"\s*:\s*)""\s*\(.*?\)', r'\1""', json_str)

        # 嘗試載入
        return json.loads(json_str)

    except json.JSONDecodeError as e:
        print(f"\nJSON 解析失敗：{e}")
        print("原始輸出：", text)
        return []

# 呼叫LLM
def call_llm(llm, prompt):
    response = llm.invoke(prompt)
    return response.content if hasattr(response, "content") else response

# ====== Main LLM Logic ======
def analyze_message_with_multi_llm(code: str):
    # Agent 1：提案改寫
    fst_input = fst_prompt.format(code=code)
    fst_response = call_llm(fst_llm, fst_input)
    proposals = extract_json(fst_response)

    # 印出反饋
    print("fst_output:", proposals)

    if not proposals:
        return {"status": "no-change", "original": code}

    approved_results = []

    for p in proposals:
        # Agent 2：語法審查
        sec_input = sec_prompt.format(proposal=json.dumps([p], ensure_ascii=False))
        sec_result = extract_json(call_llm(sec_llm, sec_input))
        # 印出反饋
        print("sec_result:", sec_result)
        

        # Agent 3：語意一致性
        third_input = third_prompt.format(
            original=p["original"],
            rewritten=p["replacement"]
        )
        third_result = extract_json(call_llm(third_llm, third_input))
        # 印出反饋
        print("third_result:", third_result)

        # 投票機制整合
        vote = (
            (sec_result and sec_result[0]["vote"] == "approve")
            and (third_result and third_result["consistency"] in ["high", "medium"])
        )

        if vote:
            approved_results.append(p)

    return {"status": "done", "approved": approved_results}

# ====== Folder Runner ======

def run_folder_review(folder_path: str):
    js_files = list(Path(folder_path).rglob("*.js"))
    print(f"共發現 {len(js_files)} 個 JS 檔案")

    for file_path in js_files:
        print(f"\n 分析：{file_path}")
        with open(file_path, "r", encoding="utf-8") as f:
            code = f.read()

        result = analyze_message_with_multi_llm(code)

        if result["status"] == "done":
            write_modified_js(file_path, result["original"], result["approved"])
        else:
            print(f"無需修改：{file_path}")

# 單一個js FOR測試
def run_first_js_review(folder_path: str):
    js_files = list(Path(folder_path).rglob("*.js"))
    if not js_files:
        print("找不到任何 JS 檔案")
        return

    first_file = js_files[0]
    print(f"正在分析第一個 JS 檔案：{first_file}")

    with open(first_file, "r", encoding="utf-8") as f:
        code = f.read()

    result = analyze_message_with_multi_llm(code)

    if result["status"] == "done":
        write_modified_js(first_file, result["original"], result["approved"])
    else:
        print(f"無需修改：{first_file}")


In [None]:
run_first_js_review(MASTER_JS)