[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/0x-yuan/clintrial-nlp/blob/main/langchain_baseline.ipynb)

# 簡化流程推理框架基線 - 臨床試驗 NLP

## 概述

本notebook展示如何使用簡單的步驟式推理流程進行臨床試驗自然語言推理(NLI)。我們模擬"流程化思考"概念，通過結構化的分析步驟逐步得出結論。

## 📚 學習目標
- 理解步驟式推理的重要性
- 建立結構化的分析流程
- 實作循序漸進的決策過程
- 評估系統效能

### 🔄 流程推理架構
我們實作5個連續的分析步驟：
1. **資料提取步驟**: 從試驗中提取相關資訊
2. **醫學分析步驟**: 從醫學角度分析陳述
3. **統計分析步驟**: 驗證數據和計算
4. **邏輯驗證步驟**: 檢查推理邏輯
5. **決策綜合步驟**: 整合所有資訊做出最終決策

> 💡 **核心概念**: 每個步驟都建立在前一步驟的結果之上，形成完整的推理鏈。

In [None]:
# 🔧 環境設置 - 一鍵安裝所需套件
!pip install -q google-generativeai python-dotenv pandas tqdm gdown

print("✅ 所有套件安裝完成！")

In [None]:
# 📥 從 Google Drive 下載訓練資料
import os
import gdown
import zipfile
import shutil

# Google Drive zip 檔案 ID
file_id = "15GA5XI39DDxQ5QkIZXsFbApx1yEvCpcR"
zip_url = f"https://drive.google.com/uc?id={file_id}"
zip_filename = "clinicaltrial-nlp.zip"

if not os.path.exists("training_data"):
    print("📥 從 Google Drive 下載 clinicaltrial-nlp.zip...")
    try:
        gdown.download(zip_url, zip_filename, quiet=False)
        
        print("📦 正在解壓縮檔案...")
        with zipfile.ZipFile(zip_filename, 'r') as zip_ref:
            zip_ref.extractall(".")
        
        if os.path.exists("clintrial-nlp/training_data"):
            shutil.move("clintrial-nlp/training_data", "training_data")
            if os.path.exists("clintrial-nlp"):
                shutil.rmtree("clintrial-nlp")
        
        os.remove(zip_filename)
        print("✅ 訓練資料下載並解壓縮完成！")
        
    except Exception as e:
        print(f"❌ 下載失敗: {e}")
        print("請手動下載: https://drive.google.com/file/d/15GA5XI39DDxQ5QkIZXsFbApx1yEvCpcR/view")
else:
    print("✅ 訓練資料已存在，跳過下載")

# 檢查下載的資料
if os.path.exists("training_data/CT json"):
    ct_files = len([f for f in os.listdir("training_data/CT json") if f.endswith('.json')])
    print(f"📄 找到 {ct_files} 個臨床試驗JSON檔案")

In [None]:
# 🧪 準備測試資料集
import json

def create_test_data_if_needed():
    if not os.path.exists("test.json"):
        try:
            with open("training_data/train.json", "r", encoding="utf-8") as f:
                train_data = json.load(f)
            test_data = dict(list(train_data.items())[:100])
            with open("test.json", "w", encoding="utf-8") as f:
                json.dump(test_data, f, indent=2, ensure_ascii=False)
            print(f"✅ 已創建測試資料集，包含 {len(test_data)} 個樣本")
        except Exception as e:
            print(f"❌ 創建測試資料失敗: {e}")
    else:
        print("✅ test.json 已存在")

create_test_data_if_needed()

In [None]:
# 載入環境變數和必要函式庫
from dotenv import load_dotenv
import os
import json
import pandas as pd
from tqdm import tqdm
import google.generativeai as genai
import time
import warnings
warnings.filterwarnings('ignore')

load_dotenv()
print("✅ 環境變數載入完成")

## 模型配置

配置Google Gemini模型進行推理：

In [None]:
from google.colab import userdata
api_key = os.getenv("GEMINI_API_KEY") or userdata.get("GOOGLE_API_KEY")

if not api_key:
    print("⚠️ 請設定 GOOGLE_API_KEY 環境變數")
    print("可以在 Colab 左側面板的 'Secrets' 中設定")
    raise ValueError("缺少 API 金鑰")
else:
    print(f"✅ 找到 API 金鑰: {api_key[:8]}...{api_key[-4:]}")

genai.configure(api_key=api_key)

# 測試 API 連接
try:
    test_model = genai.GenerativeModel("gemini-2.5-flash")
    test_response = test_model.generate_content("Hello, respond with 'API test successful'")
    print(f"✅ API 連接測試成功: {test_response.text[:50]}...")
except Exception as e:
    print(f"❌ API 連接測試失敗: {e}")
    raise

# 創建 Gemini 模型實例
model = genai.GenerativeModel(
    model_name="gemini-2.5-flash",
    generation_config=genai.types.GenerationConfig(
        temperature=0.1,
        max_output_tokens=4096,
        top_p=1,
        top_k=1
    )
)

print(f"✅ Google Gemini模型配置完成")

## 資料工具函式

建立用於載入和處理臨床試驗資料的工具函式：

In [None]:
def load_clinical_trial(trial_id: str) -> dict:
    """載入臨床試驗資料"""
    try:
        file_path = os.path.join("training_data", "CT json", f"{trial_id}.json")
        with open(file_path, "r", encoding="utf-8") as f:
            data = json.load(f)
        return data
    except FileNotFoundError:
        return {"error": f"找不到臨床試驗 {trial_id}"}
    except Exception as e:
        return {"error": f"載入 {trial_id} 時發生錯誤: {str(e)}"}

def format_trial_context(trial_data: dict, focus_section: str = None) -> str:
    """格式化試驗上下文"""
    if "error" in trial_data:
        return f"錯誤: {trial_data['error']}"
    
    sections = {
        "Eligibility": trial_data.get("Eligibility", []),
        "Intervention": trial_data.get("Intervention", []),
        "Results": trial_data.get("Results", []),
        "Adverse Events": trial_data.get("Adverse_Events", [])
    }
    
    result = [f"試驗ID: {trial_data.get('Clinical Trial ID', 'Unknown')}"]
    
    if focus_section and focus_section in sections:
        section_data = sections[focus_section]
        result.append(f"\n{focus_section} 區段:")
        if isinstance(section_data, list):
            for item in section_data[:5]:  # 限制長度
                result.append(f"  - {item}")
            if len(section_data) > 5:
                result.append(f"  ... (還有 {len(section_data)-5} 項)")
        else:
            result.append(f"  {section_data}")
    else:
        for section_name, section_data in sections.items():
            if section_data:
                result.append(f"\n{section_name}:")
                if isinstance(section_data, list):
                    for item in section_data[:3]:  # 限制長度
                        result.append(f"  - {item}")
                    if len(section_data) > 3:
                        result.append(f"  ... (還有 {len(section_data)-3} 項)")
                else:
                    result.append(f"  {section_data}")
    
    return "\n".join(result)

# 測試工具函式
sample_trial = load_clinical_trial("NCT00066573")
print(f"✅ 資料工具函式準備就緒。範例試驗: {sample_trial.get('Clinical Trial ID', '錯誤')}")

## 流程推理步驟

定義五個連續的分析步驟：

In [None]:
def step1_data_extraction(statement: str, trial_context: str) -> str:
    """步驟1: 資料提取"""
    prompt = f"""你是資料提取專家。你的任務是從臨床試驗資料中提取與陳述相關的關鍵資訊。

陳述: "{statement}"

試驗資料:
{trial_context}

請提取以下資訊：
1. 與陳述相關的關鍵數據
2. 相關的試驗結果
3. 重要的統計數字
4. 任何相關的醫學概念

請以結構化方式列出提取的資訊，最後以「提取完成」結尾。"""
    
    try:
        response = model.generate_content(prompt)
        return response.text
    except Exception as e:
        return f"資料提取錯誤: {str(e)}"

def step2_medical_analysis(statement: str, extracted_data: str) -> str:
    """步驟2: 醫學分析"""
    prompt = f"""你是醫學專家。基於提取的資料，分析陳述的醫學準確性。

陳述: "{statement}"

提取的資料:
{extracted_data}

請分析：
1. 醫學術語是否正確使用
2. 臨床概念是否合理
3. 陳述是否符合醫學邏輯
4. 與試驗資料的醫學一致性

最後以「醫學分析: [支持/反駁/不確定]」結尾。"""
    
    try:
        response = model.generate_content(prompt)
        return response.text
    except Exception as e:
        return f"醫學分析錯誤: {str(e)}"

def step3_statistical_analysis(statement: str, extracted_data: str, medical_analysis: str) -> str:
    """步驟3: 統計分析"""
    prompt = f"""你是統計分析專家。基於前面的分析，驗證陳述中的數值和統計資訊。

陳述: "{statement}"

提取的資料:
{extracted_data}

醫學分析:
{medical_analysis}

請分析：
1. 數值計算是否正確
2. 統計方法是否適當
3. 百分比和比率是否準確
4. 數據解釋是否合理

最後以「統計分析: [正確/錯誤/部分正確]」結尾。"""
    
    try:
        response = model.generate_content(prompt)
        return response.text
    except Exception as e:
        return f"統計分析錯誤: {str(e)}"

def step4_logic_verification(statement: str, medical_analysis: str, statistical_analysis: str) -> str:
    """步驟4: 邏輯驗證"""
    prompt = f"""你是邏輯驗證專家。基於前面的分析，檢查陳述的邏輯一致性。

陳述: "{statement}"

醫學分析:
{medical_analysis}

統計分析:
{statistical_analysis}

請驗證：
1. 推理邏輯是否合理
2. 因果關係是否正確
3. 結論是否從證據得出
4. 是否存在邏輯矛盾

最後以「邏輯驗證: [合理/不合理/有疑慮]」結尾。"""
    
    try:
        response = model.generate_content(prompt)
        return response.text
    except Exception as e:
        return f"邏輯驗證錯誤: {str(e)}"

def step5_decision_synthesis(statement: str, data_extraction: str, medical_analysis: str, 
                           statistical_analysis: str, logic_verification: str) -> str:
    """步驟5: 決策綜合"""
    prompt = f"""你是決策綜合專家。基於所有前面的分析步驟，做出最終的蘊含判斷。

陳述: "{statement}"

資料提取:
{data_extraction}

醫學分析:
{medical_analysis}

統計分析:
{statistical_analysis}

邏輯驗證:
{logic_verification}

任務: 判斷陳述是「蘊含」(Entailment)還是「矛盾」(Contradiction)
- 蘊含: 陳述被試驗證據支持
- 矛盾: 陳述被試驗證據反駁

請綜合所有步驟的分析，提供簡要理由，然後以「最終決策: [Entailment/Contradiction]」結尾。"""
    
    try:
        response = model.generate_content(prompt)
        return response.text
    except Exception as e:
        return f"決策綜合錯誤: {str(e)}"

print("✅ 五個流程推理步驟定義完成")

## 流程推理管道

創建執行所有步驟的完整分析管道：

In [None]:
def workflow_reasoning_pipeline(statement: str, primary_id: str, secondary_id: str = None, 
                              section_id: str = None, verbose: bool = False) -> str:
    """運行完整的流程推理管道"""
    
    try:
        # 準備試驗上下文
        primary_data = load_clinical_trial(primary_id)
        trial_context = format_trial_context(primary_data, section_id)
        
        if secondary_id:
            secondary_data = load_clinical_trial(secondary_id)
            secondary_context = format_trial_context(secondary_data, section_id)
            trial_context += f"\n\n次要試驗:\n{secondary_context}"
        
        if verbose:
            print(f"📄 分析陳述: {statement[:100]}...")
            print(f"🏥 主要試驗: {primary_id}")
            if secondary_id:
                print(f"🏥 次要試驗: {secondary_id}")
        
        # 步驟1: 資料提取
        data_extraction = step1_data_extraction(statement, trial_context)
        if verbose:
            print("📊 步驟1: 資料提取完成")
        
        # 步驟2: 醫學分析
        medical_analysis = step2_medical_analysis(statement, data_extraction)
        if verbose:
            print("🩺 步驟2: 醫學分析完成")
        
        # 步驟3: 統計分析
        statistical_analysis = step3_statistical_analysis(statement, data_extraction, medical_analysis)
        if verbose:
            print("📈 步驟3: 統計分析完成")
        
        # 步驟4: 邏輯驗證
        logic_verification = step4_logic_verification(statement, medical_analysis, statistical_analysis)
        if verbose:
            print("🧠 步驟4: 邏輯驗證完成")
        
        # 步驟5: 決策綜合
        final_decision = step5_decision_synthesis(
            statement, data_extraction, medical_analysis, 
            statistical_analysis, logic_verification
        )
        
        # 提取最終決策
        if "最終決策: Entailment" in final_decision:
            decision = "Entailment"
        elif "最終決策: Contradiction" in final_decision:
            decision = "Contradiction"
        else:
            # 備用解析
            if "entailment" in final_decision.lower() and "contradiction" not in final_decision.lower():
                decision = "Entailment"
            else:
                decision = "Contradiction"
        
        if verbose:
            print(f"⚖️ 步驟5: 最終決策 - {decision}")
            print("-" * 50)
        
        return decision
        
    except Exception as e:
        if verbose:
            print(f"❌ 管道錯誤: {e}")
        return "Contradiction"  # 保守的備用方案

print("✅ 流程推理分析管道準備就緒")

## 測試範例

測試我們的流程推理系統：

In [None]:
# 測試範例
test_statement = "there is a 13.2% difference between the results from the two the primary trial cohorts"
test_primary_id = "NCT00066573"

print(f"測試流程推理系統:")
print(f"陳述: '{test_statement}'")
print(f"主要試驗: {test_primary_id}")
print("\n" + "="*80)

# 執行分析
start_time = time.time()
result = workflow_reasoning_pipeline(
    statement=test_statement,
    primary_id=test_primary_id,
    section_id="Results",
    verbose=True
)
end_time = time.time()

print(f"\n🎯 流程推理結果: {result}")
print(f"⏱️ 執行時間: {end_time - start_time:.2f} 秒")
print("="*80)

## 在訓練資料上評估

在訓練資料樣本上評估我們的系統：

In [None]:
# 載入訓練資料
with open("training_data/train.json", "r", encoding="utf-8") as f:
    train_data = json.load(f)
print(f"載入 {len(train_data)} 個訓練範例")

# 在樣本上評估
sample_size = 15
examples = list(train_data.items())[:sample_size]

print(f"\n在 {len(examples)} 個範例上評估流程推理系統...")

results = []
correct = 0
total_time = 0

for i, (uuid, example) in enumerate(tqdm(examples, desc="流程推理處理")):
    try:
        statement = example.get("Statement")
        primary_id = example.get("Primary_id")
        secondary_id = example.get("Secondary_id")
        section_id = example.get("Section_id")
        expected = example.get("Label")
        
        if not statement or not primary_id:
            results.append({
                "uuid": uuid,
                "expected": expected,
                "predicted": "SKIPPED",
                "correct": False,
                "time": 0
            })
            continue
        
        # 獲取預測
        start_time = time.time()
        predicted = workflow_reasoning_pipeline(
            statement=statement,
            primary_id=primary_id,
            secondary_id=secondary_id,
            section_id=section_id,
            verbose=False
        )
        end_time = time.time()
        
        execution_time = end_time - start_time
        total_time += execution_time
        
        # 檢查正確性
        is_correct = (predicted.strip() == expected.strip())
        if is_correct:
            correct += 1
            
        results.append({
            "uuid": uuid,
            "statement": statement[:80] + "..." if len(statement) > 80 else statement,
            "expected": expected,
            "predicted": predicted,
            "correct": is_correct,
            "time": execution_time
        })
        
        status = "✅" if is_correct else "❌"
        print(f"範例 {i+1:2d}: {expected:12} -> {predicted:12} {status} ({execution_time:.1f}s)")
        
    except Exception as e:
        print(f"處理範例 {i+1} 時發生錯誤: {e}")
        results.append({
            "uuid": uuid,
            "expected": expected,
            "predicted": "ERROR",
            "correct": False,
            "time": 0
        })

# 計算準確率
accuracy = correct / len(examples) if examples else 0
avg_time = total_time / len(examples) if examples else 0

print(f"\n📊 流程推理系統結果:")
print(f"準確率: {accuracy:.2%} ({correct}/{len(examples)})")
print(f"平均執行時間: {avg_time:.2f} 秒/例")
print(f"總執行時間: {total_time:.2f} 秒")

## 產生提交檔案

使用我們的流程推理系統產生預測結果：

In [None]:
def generate_workflow_submission(test_file="test.json", output_file="workflow_reasoning_submission.json", sample_size=None):
    """使用流程推理系統產生提交檔案"""
    
    # 載入測試資料
    try:
        with open(test_file, "r", encoding="utf-8") as f:
            test_data = json.load(f)
    except:
        print(f"❌ 無法載入測試資料 {test_file}")
        return
    
    examples = list(test_data.items())
    if sample_size:
        examples = examples[:sample_size]
        
    print(f"🚀 為 {len(examples)} 個範例產生流程推理預測...")
    
    submission = {}
    
    for i, (uuid, example) in enumerate(tqdm(examples, desc="流程推理處理")):
        try:
            statement = example.get("Statement")
            primary_id = example.get("Primary_id")
            secondary_id = example.get("Secondary_id")
            section_id = example.get("Section_id")
            
            if not statement or not primary_id:
                submission[uuid] = {"Prediction": "Contradiction"}
                continue
                
            # 獲取預測
            prediction = workflow_reasoning_pipeline(
                statement=statement,
                primary_id=primary_id,
                secondary_id=secondary_id,
                section_id=section_id,
                verbose=False
            )
            
            submission[uuid] = {"Prediction": prediction}
            
        except Exception as e:
            print(f"處理 {uuid} 時發生錯誤: {e}")
            submission[uuid] = {"Prediction": "Contradiction"}
    
    # 儲存提交檔案
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(submission, f, indent=2)
    
    print(f"✅ 流程推理提交檔案已儲存至 {output_file}")
    return submission

# 產生小樣本提交
workflow_submission = generate_workflow_submission(
    test_file="test.json", 
    output_file="workflow_reasoning_submission.json",
    sample_size=10
)

print(f"為 {len(workflow_submission)} 個範例產生了預測")

## 結論

### 流程推理系統優勢：
1. **結構化思考**: 清晰的五步驟分析流程
2. **循序漸進**: 每步驟建立在前一步基礎上
3. **全面分析**: 涵蓋資料、醫學、統計、邏輯各層面
4. **透明度高**: 每個推理步驟都可追蹤
5. **模組化**: 可以獨立改進每個步驟

### 五步驟推理流程：
1. **資料提取**: 從試驗資料中提取關鍵資訊
2. **醫學分析**: 評估醫學概念和術語的準確性
3. **統計分析**: 驗證數值計算和統計方法
4. **邏輯驗證**: 檢查推理邏輯和因果關係
5. **決策綜合**: 整合所有分析做出最終判斷

### 適用場景：
- 需要詳細推理過程的分析任務
- 複雜的多層次決策問題
- 需要步驟追蹤的品質管控
- 教學和訓練用途

這個流程推理系統展示了如何通過結構化的分析步驟來處理複雜的臨床試驗NLP任務，每個步驟都有明確的目標和輸出，形成完整的推理鏈。