<a href="https://colab.research.google.com/github/RockZeroAxl/Biomedical-Text-Processing/blob/main/Execution_Entry_Point.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from text_utils import TextCleaner, DateParser
from tnm_parser import TNMExtractor
import json

def process_medical_report(raw_text: str, report_date: str):
    """
    模擬 pipeline 處理流程：
    1. 清洗資料
    2. 正規化日期
    3. 擷取關鍵欄位 (TNM)
    4. 輸出結構化資料 (JSON)
    """

    # 1. 初始化工具
    cleaner = TextCleaner()
    tnm_engine = TNMExtractor()

    # 2. 資料前處理
    clean_text = cleaner.normalize_text(raw_text)
    norm_date = DateParser.parse_report_date(report_date)

    print(f"--- Processing Report (Date: {norm_date}) ---")

    # 3. 資訊擷取 (Information Extraction)
    tnm_result = tnm_engine.extract(clean_text)

    # 4. 邏輯驗證
    final_tnm = tnm_engine.validate_logic(tnm_result)

    # 5. 結構化輸出
    output = {
        "report_date": norm_date,
        "cancer_staging": {
            "pT": final_tnm['pT'],
            "pN": final_tnm['pN'],
            "pM": final_tnm['pM'] or 'Not Mentioned'
        },
        "original_fragment_preview": clean_text[:100] + "..."
    }

    return output

if __name__ == "__main__":
    # --- Mock Data (模擬去識別化的病理報告) ---
    sample_report = """
    Pathological diagnosis:
    Left Breast, Modified Radical Mastectomy:
    Invasive ductal carcinoma.
    Tumor size: 2.5 x 2.0 cm.

    Pathologic stage: pT2N1aM0
    1. Tumor invades muscularis propria.
    2. Lymph nodes: 2/12 positive for malignancy.
    """

    sample_date = "1120520" # 民國 112 年

    # 執行
    result = process_medical_report(sample_report, sample_date)

    # 打印結果 (JSON format)
    print(json.dumps(result, indent=4, ensure_ascii=False))