In [None]:
import re
import json
from langchain_community.chat_models import ChatOllama 
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.prompts import PromptTemplate
from langchain.embeddings import OllamaEmbeddings
from langchain.agents import initialize_agent, AgentType
import chardet
from pathlib import Path
from typing import List
from langchain.docstore.document import Document as LangDocument
from rag import * 
from prompt import *
import autogen

MASTER_JS = r".\MG0900S.js"

In [None]:
# 建立RAG
# all_chunks = collect_all_vb_chunks(SOURCE_DIR)
# build_vector_store(all_chunks)

In [None]:
class MessageClassification(BaseModel):
    line: str = Field(...)
    original: str = Field(...)
    issue: str = Field(...)
    replacement: str = Field(...)
    reason: str = Field(...)

# Json 輸出格式解析器
parser = JsonOutputParser()
format_instructions = parser.get_format_instructions()

# 使用本地 LLM 模型
embed_model = OllamaEmbeddings(model="nomic-embed-text") 
fst_llm = ChatOllama(model="llama3:8B") #提出
sec_llm = ChatOllama(model="mistral:7B") #審查
third_llm = ChatOllama(model="phi3:3.8B") #檢查改動是否符合原始邏輯，若符合則發動投票


In [None]:
from langchain.tools import Tool
from json_repair import repair_json
# 建立工具
def extract_json(text: str) -> str:
    try:
        start = text.find('[')
        end = text.rfind(']') + 1
        if start == -1 or end <= start:
            return "No JSON array found."
        json_str = text[start:end]
        json_str = json_str.encode('utf-8').decode('unicode_escape')
        repaired_string = repair_json(json_str)
        try:
            parsed = json.loads(repaired_string)
        except json.JSONDecodeError:
            cleaned = re.sub(r',\s*([\]}])', r'\1', repaired_string)
            repaired_string = repair_json(cleaned)
            parsed = json.loads(repaired_string)
        return json.dumps(parsed, ensure_ascii=False, indent=2)
    except Exception as e:
        return f"Failed to extract JSON: {str(e)}"


tools = [
    Tool(
        name="check json format",
        func=extract_json,
        description="Use this tool when you receive a string that may contain a JSON array and you need to extract or validate it."
    )
]

In [None]:
# 建立 Agent
agent_llama3_8B = initialize_agent(
    tools=tools,
    llm=fst_llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True,
    handle_parsing_errors=True
)

agent_mistral_7B = initialize_agent(
    tools=tools,
    llm=sec_llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True,
    handle_parsing_errors=True
)

agent_phi3_3dot8B = initialize_agent(
    tools=tools,
    llm=third_llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True,
    handle_parsing_errors=True
)

In [None]:

# 生成.js
def write_modified_js(original_path: str, original_code: str, approved_changes: List[dict]):
    modified_code = original_code
    for item in approved_changes:
        modified_code = modified_code.replace(item["original"], item["replacement"])
    
    p = Path(original_path)
    output_path = p.with_name(f"{p.stem}_modified{p.suffix}")
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(modified_code)

# 呼叫LLM
def call_llm(llm, prompt):
    response = llm.invoke(prompt)
    return response.content if hasattr(response, "content") else str(response) # Ensure it's a string


# ====== decoder ======
def read_text_file_safely(path: str) -> str:
    with open(path, "rb") as f:
        raw_data = f.read()
        detected = chardet.detect(raw_data)
        encoding = detected["encoding"] or "utf-8"

    fallback_encodings = []

    if encoding: # Add the detected encoding first
        fallback_encodings.append(encoding)
    if 'GB2312' not in fallback_encodings: # Add GB2312 if not already present
        fallback_encodings.append("GB2312")

    # Ensure common encodings are covered
    additional_encodings = ["utf-8", "big5", "cp950", "gbk", "gb18030", "utf-16", "windows-1252"]
    for enc in additional_encodings:
        if enc not in fallback_encodings:
            fallback_encodings.append(enc)

    for enc in fallback_encodings:
        try:
            print(f"嘗試使用編碼decode: {enc}")
            return raw_data.decode(enc)
        except UnicodeDecodeError:
            continue

    raise UnicodeDecodeError(f"所有常見編碼皆無法解碼檔案: {path}")


In [None]:
# ====== Main LLM Logic ======
def code_rewrite_proposal(code: str) -> str:
    # Agent 1：提案改寫
    fst_input = fst_prompt.format(code=code)
    # fst_response = call_llm(fst_llm, fst_input)
    # 改成agent with tool
    fst_response = agent_llama3_8B.run(fst_input)
    print(fst_response)
    return fst_response
    # proposals = extract_json(fst_response)

    # if not proposals:
    #     return {"status": "no-change", "original": code}

    # approved_results = []

    # for i, p in enumerate(proposals):
    #     print(f"\nProcessing proposal {i+1}: {p}")
    #     # 檢查關鍵鍵是否存在
    #     if not all(k in p for k in ["line", "original", "issue", "replacement", "reason"]):
    #         print(f"Skipping proposal {i+1} due to missing required keys: {p}")
    #         continue

    #     # Agent 2：語法審查
    #     sec_input = sec_prompt.format(proposal=json.dumps([p], ensure_ascii=False))
    #     # sec_response = call_llm(sec_llm, sec_input)
    #     sec_response = agent_mistral_7B.run(sec_input)
    #     sec_result = extract_json(sec_response)
    #     print("sec_result:", sec_result)
        

    #     # Agent 3：語意一致性
    #     third_input = third_prompt.format(
    #         original=p["original"],
    #         rewritten=p["replacement"]
    #     )
    #     # third_response = call_llm(third_llm, third_input)
    #     third_response = agent_phi3_3dot8B.run(third_input)
    #     third_result = extract_json(third_response)
    #     print("third_result:", third_result)

    #     # 投票機制
    #     vote = (
    #         (sec_result and sec_result[0]["vote"] == "approve")
    #         and (third_result and third_result[0]["vote"] == "approve")
    #     )

    #     if vote:
    #         approved_results.append(p)

    # return {"status": "done", "approved": approved_results}

def code_linting_report(proposals: str) -> List:
    if not proposals:
        return [{"status": "no-change", "message": ""}]
    
    try:
        proposals_list = json.loads(proposals)
    except json.JSONDecodeError as e:
        print(f"Invalid JSON input: {e}")
        return [{"status": "invalid-json", "message": str(e)}]
    
    sec_response_list = []

    for i, p in enumerate(proposals_list):
        print(f"\nProcessing proposal {i+1}: {p}")
        # 檢查關鍵鍵是否存在
        if not all(k in p for k in ["line", "original", "issue", "replacement", "reason"]):
            print(f"Skipping proposal {i+1} due to missing required keys: {p}")
            continue

        sec_input = sec_prompt.format(proposal=json.dumps([p], ensure_ascii=False))
        sec_response = agent_mistral_7B.run(sec_input)
        print(sec_response)
        sec_response_list.append(sec_response)

    return sec_response_list

def extract_function_blocks(js_code: str) -> list[str]:
    blocks = []
    stack = []
    start = None
    i = 0
    while i < len(js_code):
        if js_code[i:i+8].startswith('function'):
            if not stack:
                start = i
        if js_code[i] == '{':
            stack.append(i)
        elif js_code[i] == '}':
            if stack:
                stack.pop()
                if not stack and start is not None:
                    blocks.append(js_code[start:i+1].strip())
                    start = None
        i += 1
    return blocks


In [None]:
# 單一個js FOR測試
def run_first_js_review(folder_path: str):
    
    code = read_text_file_safely(folder_path)
    # function_div = extract_function_blocks(code)

    
    proposal_str = code_rewrite_proposal(code)

    print(f"proposal_str", proposal_str)

    linting_str = code_linting_report(proposal_str)
    
    print(f"linting_str", linting_str)


    # if result["status"] == "done":
    #     print(f"修改完畢如下")
    #     print(result["approved"])
    # else:
    #     print(f"無需修改")

run_first_js_review(MASTER_JS)

### Able to improve ###

1. 提示工程: few-shot、many-shot 的內容收斂到單一項目(e.g document.all)
2. 提示工程: Chain-of-Thought、Buffer of Thought
3. 提示工程: 更精確的提示詞、role-playing

目前是使用LLMs Ensemble 中類似 Role-based Multi-Agent 架構(但不是真的Agnet)，所以這個方向有:

4. Agent: 根據文章 https://arxiv.org/pdf/2304.03442 建立架構，記錄成功的記憶(memory)
5. Agnet: MCP https://ihower.tw/presentation/ihower-MCP-2025-05-23.pdf?fbclid=IwQ0xDSwKfVtdleHRuA2FlbQIxMQABHpHnLaqK2X9AmlPvZO0bxqlCfWfCa3UUJV6VEPKdkzzqOsKRLHGSClKi7bV0_aem_HBd1VtOjMNWnJVOLBG0L6Q