In [1]:
import requests
import json
import pandas as pd
from datetime import datetime

BASE_URL = "https://clinicaltrials.gov/api/v2/studies"

def fetch_full_trial(query_term="diabetes"):
    url = f"{BASE_URL}?query.term={query_term}&pageSize=5"
    print("🔍 正在抓取完整試驗資料...")
    response = requests.get(url)
    if response.status_code != 200:
        print(f"❌ 錯誤：{response.status_code} - {response.text}")
        return []
    data = response.json()
    studies = data.get("studies", [])
    if not studies:
        print("⚠️ 沒有資料")
        return []
    return studies

def flatten_dict(d, parent_key='', sep='.'):
    items = []
    for k, v in d.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, dict):
            items.extend(flatten_dict(v, new_key, sep=sep).items())
        elif isinstance(v, list):
            for i, item in enumerate(v):
                if isinstance(item, dict):
                    items.extend(flatten_dict(item, f"{new_key}[{i}]", sep=sep).items())
                else:
                    items.append((f"{new_key}[{i}]", item))
        else:
            items.append((new_key, v))
    return dict(items)

def reorder_columns(df):
    all_cols = list(df.columns)
    protocol_cols = [col for col in all_cols if col.startswith("protocolSection")]
    derived_cols = [col for col in all_cols if col.startswith("derivedSection")]
    has_result_cols = [col for col in all_cols if col.startswith("hasResult")]
    document_cols = [col for col in all_cols if col.startswith("documentSection")]
    results_cols = [col for col in all_cols if col.startswith("resultsSection")]
    other_cols = [col for col in all_cols if col not in protocol_cols + derived_cols + has_result_cols + document_cols + results_cols]
    # 重新排列順序
    return df[protocol_cols + derived_cols + has_result_cols + document_cols + results_cols + other_cols]

if __name__ == "__main__":
    trials = fetch_full_trial(query_term="diabetes")
    if trials:
        today_str = datetime.today().strftime("%Y%m%d")
        
        with open(f"full_trials_{today_str}.json", "w", encoding="utf-8") as f:
            json.dump(trials, f, ensure_ascii=False, indent=2)
        
        flat_trials = []
        for trial in trials:
            flat_trial = flatten_dict(trial)
            flat_trials.append(flat_trial)
        
        df = pd.DataFrame(flat_trials)
        
        df = reorder_columns(df)
        
        df.to_csv(f"all_trials_{today_str}.csv", index=False)
        print(f"✅ 已輸出到 all_trials_{today_str}.csv")

🔍 正在抓取完整試驗資料...
✅ 已輸出到 all_trials_20250730.csv
