In [1]:
import requests
import json
import warnings
from urllib3.exceptions import InsecureRequestWarning

warnings.simplefilter("ignore", InsecureRequestWarning)

HEADERS = {
    "accept": "application/json",
    "content-type": "application/json",
    "authorization": "Token b85ee49cde4db576eee2522c6c340e7f967537e2"
    }

def query_clinical_result(
    indication_name=None,
    drug_modality=None,
    locations=None,
    target=None,
    lead_company=None,
    phase=None,
    route_of_administration=None,
    drug_feature=None,
    drug_name=None,
    nctids=None,
    limit=30,
    page=1
):
    """
    Calls the clinical result API with the provided filters.

    Args:
        indication_name (list): List of indication names
        drug_modality (list or dict): Drug modality list or dict with data list and logic (or/and)
        locations (list or dict): Locations list or dict with data list and logic (or/and)
        target (list or dict): Targets list or dict with data list and logic (or/and)
        lead_company (list): List of lead companies
        phase (list): List of clinical phases
        route_of_administration (list or dict): ROA list or dict with data list and logic (or/and)
        drug_feature (list or dict): Drug features list or dict with data list and logic (or/and)
        drug_name (list or dict): Drug names list or dict with data list and logic (or/and)
        nctids (list): List of NCT IDs
        limit (int): Number of results to return (max 30)
        page (int): Page number for pagination

    Returns:
        dict: API response data
    """
    
    clinical_trial_api_url = f"https://staging.noahai.co/api/workflow/clinical-result/"
    
    # Helper function to convert list to dict with "or" logic if needed
    def ensure_dict_format(param):
        if isinstance(param, list):
            return {"data": param, "logic": "or"}
        return param
    
    # Build filter object with provided parameters
    filter_dict = {}
    if indication_name: filter_dict["indication_name"] = indication_name
    
    # Convert parameters to dicts with "or" logic if they're lists
    if drug_modality: filter_dict["drug_modality"] = ensure_dict_format(drug_modality)
    if locations: filter_dict["locations"] = ensure_dict_format(locations)
    if target: filter_dict["target"] = ensure_dict_format(target)
    if lead_company: filter_dict["lead_company"] = lead_company
    if phase: filter_dict["phase"] = phase
    if route_of_administration: filter_dict["route_of_administration"] = ensure_dict_format(route_of_administration)
    if drug_feature: filter_dict["drug_feature"] = ensure_dict_format(drug_feature)
    if drug_name: filter_dict["drug_name"] = ensure_dict_format(drug_name)
    if nctids: filter_dict["nctids"] = nctids
    
    body = {"filters": filter_dict, "limit": limit, "page": page}
    
    response = requests.post(clinical_trial_api_url, data=json.dumps(body), headers=HEADERS, timeout=240, allow_redirects=True, verify=False)
    try: ret = response.json()
    except: ret = response.text
    return ret

def query_drug_compete(
    location=None,
    drug_modality=None,
    indication_name=None,
    drug_names=None,
    company=None,
    target=None,
    drug_feature=None,
    route_of_administration=None,
    phase=None,
    limit=30,
    page=1
):
    """
    Calls the drug compete API with the provided filters.

    Args:
        location (list or dict): Locations list or dict with data list and logic (or/and)
        drug_modality (list or dict): Drug modality list or dict with data list and logic (or/and)
        indication_name (list): List of indication names
        drug_names (list or dict): Drug names list or dict with data list and logic (or/and)
        company (list): List of lead companies
        target (list or dict): Targets list or dict with data list and logic (or/and)
        drug_feature (list or dict): Drug features list or dict with data list and logic (or/and)
        route_of_administration (list or dict): ROA list or dict with data list and logic (or/and)
        phase (list): List of clinical phases
        limit (int): Number of results to return (max 30)
        page (int): Page number for pagination

    Returns:
        dict: API response data
    """
    
    drug_compete_api_url = f"https://staging.noahai.co/api/workflow/drug-compete/"
    
    # Helper function to convert list to dict with "or" logic if needed
    def ensure_dict_format(param):
        if isinstance(param, list):
            return {"data": param, "logic": "or"}
        return param
    
    # Build filter object with provided parameters
    filter_dict = {}
    if location: filter_dict["location"] = location
    if drug_modality: filter_dict["drug_modality"] = ensure_dict_format(drug_modality)
    if indication_name: filter_dict["indication_name"] = indication_name
    if drug_names: filter_dict["drug_names"] = ensure_dict_format(drug_names)
    if company: filter_dict["company"] = company
    if target: filter_dict["target"] = ensure_dict_format(target)
    if drug_feature: filter_dict["drug_feature"] = ensure_dict_format(drug_feature)
    if route_of_administration: filter_dict["route_of_administration"] = ensure_dict_format(route_of_administration)
    if phase: filter_dict["phase"] = phase
    
    body = {"filters": filter_dict, "limit": limit, "page": page}
    
    response = requests.post(drug_compete_api_url, data=json.dumps(body), headers=HEADERS, timeout=240, allow_redirects=True, verify=False)
    try: ret = response.json()
    except: ret = response.text
    return ret

In [2]:
full_trial_params_example = {
    "indication_name": [
        "Degeneration"
    ],
    "drug_modality": {
        "data": [
            "Small Molecule Drugs"
        ],
        "logic": "or"
    },
    "locations": {
        "data": [
            "United States"
        ],
        "logic": "or"
    },
    "target": {
        "data": [
            "ATP7B gene"
        ],
        "logic": "or"
    },
    "lead_company": [
        "Pfizer Inc. (PFE)"
    ],
    "phase": [
        "I"
    ],
    "route_of_administration": {
        "data": [
            "Intraarterial"
        ],
        "logic": "or"
    },
    "drug_feature": {
        "data": [
            "505b2"
        ],
        "logic": "or"
    },
    "drug_name": {
        "data": [
            "Brimochol F"
        ],
        "logic": "or"
    },
    "nctids": [
        "NCT0123"
    ]
}

full_drug_params_example = {
    "location": [
        "USA"
    ],
    "drug_modality": {
        "data": [
            "Protein Degrader"
        ],
        "logic": "or"
    },
    "indication_name": [
        "Porphyria acute"
    ],
    "drug_names": {
        "data": [
            "Aiphagan P"
        ],
        "logic": "or"
    },
    "company": [
        "Aravax Pvt Ltd"
    ],
    "target": {
        "data": [
            "FXYD domain containing ion transport regulator 5(RIC, HSPC113, KCT1, PRO6241, FXYD5, OIT2, DYSAD, IWU1)"
        ],
        "logic": "or"
    },
    "drug_feature": {
        "data": [
            "Bacterial Product"
        ],
        "logic": "or"
    },
    "route_of_administration": {
        "data": [
            "Intralymphatic"
        ],
        "logic": "or"
    },
    "phase": [
        "III"
    ]
}

trial_params_example = {
    "locations": {
        "data": [
            "United States"
        ],
        "logic": "or"
    }
}

drug_params_example = {
    "location": [
        "USA"
    ]
}

trials_result = query_clinical_result(**trial_params_example)
drugs_result = query_drug_compete(**drug_params_example)

print(str(trials_result)[:100])
print(str(drugs_result)[:100])

print(len(trials_result['results']))
print(len(drugs_result['results']))

print(trials_result['results'][0] if trials_result['results'] else "No results found")
print(drugs_result['results'][1] if drugs_result['results'] else "No results found")

{'results': [{'id': 59350, 'nct_id': 'NCT04505722', 'primary_id': 'NCT04505722', 'last_updated': '20
{'results': [{'name': 'REC-648647', 'other_names': [], 'lead_company': ['Recursion Pharmaceuticals']
30
30
{'id': 59350, 'nct_id': 'NCT04505722', 'primary_id': 'NCT04505722', 'last_updated': '2022-02-01', 'official_title': 'A Randomized, Double-blind, Placebo-controlled Phase 3 Study to Assess the Efficacy and Safety of Ad26.COV2.S for the Prevention of SARS-CoV-2-mediated COVID-19 in Adults Aged 18 Years and Older', 'lead_company': 'Johnson & Johnson (JNJ)', 'partner_companies': None, 'drug_name': ['Jcovden/Ad26.COV2.S'], 'drug_modality': ['Vaccine'], 'drug_feature': ['Precision Medicine'], 'route_of_administration': ['Intramuscular (IM) Injection'], 'indication_name': ['Coronavirus disease 19 infection'], 'target': ['Immune System', 'SARS-CoV-2'], 'phase': 'III', 'phase_mapping': ['III'], 'current_status': 'Final Data', 'gender': 'Both', 'actual_enrollment': 44325, 'locations': ['Arge

In [35]:
"""
preview_10_templates.py  (v3, 2025-05-19)
────────────────────────────────────────────────────────────
• 10 个模板各打印 1 条真实问句
• 自适应降级：先(适应症+国家)→(适应症)→(国家)→(无过滤)
• 修复 clinical-result 只能用 'locations' 字段的 TypeError
"""

import random, time, sys
from collections import defaultdict
from typing import Dict, Any
from tqdm import tqdm

# ───────────────────────────────────────────────────────────
# 0. 你的 API 包装（请按实际文件名替换）
# ───────────────────────────────────────────────────────────

# ───────────────────────────────────────────────────────────
# 1. 扩大热点池：20 × 20
# ───────────────────────────────────────────────────────────
HOT_INDICATIONS = [
    # 代谢 / 自身免疫
    "Obesity", "Type 2 Diabetes", "NASH", "Hypercholesterolemia",
    "Psoriasis", "Systemic lupus erythematosus", "Rheumatoid arthritis",
    "Crohn disease", "Ulcerative colitis", "Atopic dermatitis",
    # 肿瘤高频
    "Non-small cell lung cancer stage IV", "Small cell lung cancer",
    "HER2-positive breast cancer", "Triple-negative breast cancer",
    "Colorectal cancer", "Ovarian cancer", "Prostate cancer",
    "Pancreatic cancer", "Melanoma", "Multiple myeloma"
]

HOT_LOCATIONS = [
    "USA", "China", "Japan", "Germany", "France",
    "United Kingdom", "Canada", "Australia", "Brazil", "India",
    "Spain", "Italy", "South Korea", "Netherlands", "Sweden",
    "Russia", "Mexico", "South Africa", "Saudi Arabia", "Singapore"
]

# ───────────────────────────────────────────────────────────
# 2. 10 个模板
# ───────────────────────────────────────────────────────────
TEMPLATES: Dict[str, Dict[str, Any]] = {
    # 药物竞争格局类
    "C1": {
        "api": "drug",
        "base": {},  # 仅靠模糊过滤
        "text": lambda c: f"{c['location']} 市场上的 {c['indication']} 药物竞争格局如何？\n"
                         f"参数: indication='{c['indication']}', location='{c['location']}'"
    },
    "C2": {
        "api": "drug",
        "base": {
            "drug_modality": {"data": ["Antibody-Drug Conjugates, ADCs"], "logic": "or"}
        },
        "text": lambda c: f"在 {c['location']}，ADC 类药物目前的竞争格局是什么？\n"
                         f"参数: modality='ADCs', location='{c['location']}'"
    },
    "C3": {
        "api": "drug",
        "base": {
            "target": {"data": ["EGFR"], "logic": "or"}
        },
        "text": lambda c: f"如果要开发新的 EGFR 靶向药，目前 {c['location']} 针对 {c['indication']} 的竞争环境如何？\n"
                         f"参数: indication='{c['indication']}', target='EGFR', location='{c['location']}'"
    },
    "C4": {
        "api": "drug",
        "base": {
            "target": {"data": ["GLP-1"], "logic": "or"},
            "route_of_administration": {"data": ["Oral (PO)"], "logic": "or"}
        },
        "text": lambda c: f"{c['location']} 口服 GLP-1 药物分别用于哪些适应症、临床阶段分布如何？\n"
                         f"参数: target='GLP-1', ROA='Oral', location='{c['location']}'"
    },
    "C5": {
        "api": "drug",
        "base": {
            "indication_name": ["Systemic lupus erythematosus"],
            "phase": ["Approved"]
        },
        "text": lambda c: f"{c['location']} 已获批的系统性红斑狼疮（SLE）药物有哪些？\n"
                         f"参数: indication='SLE', phase='Approved', location='{c['location']}'"
    },
    # 临床结果洞察类
    "I1": {
        "api": "trial",
        "base": {
            "indication_name": ["Obesity"],
            "drug_name": {"data": ["Semaglutide", "Tirzepatide"], "logic": "or"},
            "phase": ["III"]
        },
        "text": lambda c: f"Semaglutide 与 Tirzepatide 的 III 期研究相比，哪款减肥效果更好？"
    },
    "I2": {
        "api": "trial",
        "base": {
            "drug_modality": {"data": ["Antibody-Drug Conjugates, ADCs"], "logic": "or"},
            "indication_name": ["breast tumor"]
        },
        "text": lambda c: "乳腺癌领域目前表现最优的 ADC 药物是哪一款？"
    },
    "I3": {
        "api": "trial",
        "base": {
            "target": {"data": ["EGFR"], "logic": "or"},
            "phase": ["III"],
            "indication_name": ["Non-small cell lung cancer stage IV"]
        },
        "text": lambda c: "在晚期 NSCLC III 期试验中，哪款 EGFR 靶向药表现最佳？"
    },
    "I4": {
        "api": "trial",
        "base": {
            "drug_name": {"data": ["Kadcyla", "Enhertu"], "logic": "or"},
            "phase": ["III"]
        },
        "text": lambda c: "请撰写推广报告，突出 Kadcyla 相比 Enhertu 的 III 期优势。"
    },
    "I5": {
        "api": "trial",
        "base": {
            "drug_modality": {"data": ["siRNA/RNAi"], "logic": "or"}
        },
        "text": lambda c: "小核酸（siRNA/RNAi）药物在哪些适应症被证实有效？请列出关键证据。"
    }
}

# ───────────────────────────────────────────────────────────
# 3. 通用：把 Noah 返回记录抽取成 ctx
# ───────────────────────────────────────────────────────────
def extract_ctx(item: Dict) -> Dict[str, str]:
    ctx = defaultdict(str)
    # location (两种可能)
    loc = item.get("location") or item.get("locations") or ""
    if isinstance(loc, list):
        loc = loc[0]
    elif isinstance(loc, dict):
        loc = loc.get("data", [""])[0]
    ctx["location"] = loc

    # indication
    ind = item.get("indication_name") or item.get("indication") or ""
    if isinstance(ind, list):
        ind = ind[0]
    ctx["indication"] = ind
    return ctx

# ───────────────────────────────────────────────────────────
# 4. 根据 API 类型拼 fuzzy 过滤
#    drug:  location → 'location'
#    trial: location → 'locations':{'data': …}
# ───────────────────────────────────────────────────────────
def merge_filters(api_type: str, base: Dict, ind: str = None, loc: str = None) -> Dict:
    filt = base.copy()
    if ind:
        filt.setdefault("indication_name", [ind])
    if loc:
        if api_type == "drug":
            filt.setdefault("location", [loc])
        else:  # trial
            filt.setdefault("locations", {"data": [loc], "logic": "or"})
    return filt

# ───────────────────────────────────────────────────────────
# 5. 自适应降级查询：逐步放宽直到拿到数据
# ───────────────────────────────────────────────────────────
def fetch_one(api_type: str, base: Dict, max_try: int = 60):
    """
    返回 (record, ctx)；若找不到 → (None, None)
    """
    # 构造 4 级降级组合
    combos = []
    for ind in HOT_INDICATIONS:
        for loc in HOT_LOCATIONS:
            combos.append((ind, loc))
    combos += [(ind, None) for ind in HOT_INDICATIONS]
    combos += [(None, loc) for loc in HOT_LOCATIONS]
    combos.append((None, None))

    random.shuffle(combos)  # 随机化，减少热门冲突
    tries = 0
    for ind, loc in combos:
        tries += 1
        if tries > max_try:
            break

        filt = merge_filters(api_type, base, ind, loc)
        if api_type == "drug":
            resp = query_drug_compete(**filt)
        else:
            resp = query_clinical_result(**filt)

        results = resp.get("results", []) if isinstance(resp, dict) else []
        if results:
            rec = random.choice(results)
            return rec, extract_ctx(rec)
        time.sleep(0.3)      # 轻限流
    return None, None

# ───────────────────────────────────────────────────────────
# 6. 主流程：每模板 1 条
# ───────────────────────────────────────────────────────────
def main():
    print("\n=== 每个模板生成 1 条预览 ===")
    for key in tqdm(TEMPLATES, desc="模板进度"):
        cfg  = TEMPLATES[key]
        api  = cfg["api"]
        base = cfg["base"]
        rec, ctx = fetch_one(api, base)
        if rec:
            print(f"\n★ {key}\n{cfg['text'](ctx)}")
        else:
            print(f"\n× {key} - 连降级后仍未找到数据（staging 数据过少？）")

if __name__ == "__main__":
    main()



=== 每个模板生成 1 条预览 ===


模板进度:   0%|          | 0/10 [00:00<?, ?it/s]

模板进度:  10%|█         | 1/10 [00:01<00:13,  1.54s/it]


★ C1
Netherlands 市场上的 Obesity 药物竞争格局如何？
参数: indication='Obesity', location='Netherlands'


模板进度:  20%|██        | 2/10 [00:03<00:11,  1.50s/it]


★ C2
在 Netherlands，ADC 类药物目前的竞争格局是什么？
参数: modality='ADCs', location='Netherlands'


模板进度:  30%|███       | 3/10 [00:47<02:28, 21.27s/it]


× C3 - 连降级后仍未找到数据（staging 数据过少？）


模板进度:  40%|████      | 4/10 [01:33<03:05, 30.85s/it]


× C4 - 连降级后仍未找到数据（staging 数据过少？）


模板进度:  50%|█████     | 5/10 [02:01<02:28, 29.76s/it]


★ C5
Germany 已获批的系统性红斑狼疮（SLE）药物有哪些？
参数: indication='SLE', phase='Approved', location='Germany'


模板进度:  60%|██████    | 6/10 [02:01<01:19, 19.90s/it]


★ I1
Semaglutide 与 Tirzepatide 的 III 期研究相比，哪款减肥效果更好？


模板进度:  70%|███████   | 7/10 [02:45<01:22, 27.61s/it]


× I2 - 连降级后仍未找到数据（staging 数据过少？）


模板进度:  80%|████████  | 8/10 [03:29<01:05, 32.79s/it]


× I3 - 连降级后仍未找到数据（staging 数据过少？）


模板进度:  90%|█████████ | 9/10 [04:14<00:36, 36.68s/it]


× I4 - 连降级后仍未找到数据（staging 数据过少？）


模板进度: 100%|██████████| 10/10 [04:57<00:00, 29.72s/it]


× I5 - 连降级后仍未找到数据（staging 数据过少？）





In [37]:
import requests
import json

def call_slot_fill_tool(prompt, tool, language="en"):
    """
    Question -> query parameters, used to query Noah AI database, text in json out
    Expected time to run: < 15s
    Output: {'result': 'success', 'data': <dict>}
    Available tools include:
    [
        "General-Inference",
        "Medical-Search",
        "Web-Search",
        "Clinical-Trial-Result-Analysis",
        "Drug-Analysis",
        "Catalyst-Event-Analysis"
    """
    noah_api_url = f"https://staging.noahai.co/api/tool_test/"
    body = {"language":"en", "user_prompt":prompt, "tool":tool, "slot_fill":True}
    headers = {
        "accept": "application/json",
        "content-type": "application/json",
        "authorization": "Token ab2af44c17490f0c3c3b221b0f6fc2c20d62590a"}
    response = requests.post(noah_api_url, data=json.dumps(body), headers=headers, timeout=30, allow_redirects=True, verify=False)
    try: ret = response.json()
    except: ret = response.text
    return ret

def call_tool(prompt, tool, language="en"):
    """
    Question -> report, text in text out
    Expected time to run: ~2min
    Output: {'result': 'success', 'data': <txt>}
    Available tools include:
    [
        "General-Inference",
        "Medical-Search",
        "Web-Search",
        "Clinical-Trial-Result-Analysis",
        "Drug-Analysis",
        "Catalyst-Event-Analysis"
    """
    noah_api_url = f"https://staging.noahai.co/api/tool_test/"
    body = {"language": language, "user_prompt":prompt, "tool":tool, "slot_fill":False}
    headers = {
        "accept": "application/json",
        "content-type": "application/json",
        "authorization": "Token ab2af44c17490f0c3c3b221b0f6fc2c20d62590a"}
    response = requests.post(noah_api_url, data=json.dumps(body), headers=headers, timeout=240, allow_redirects=True, verify=False)
    try: ret = response.json()
    except: ret = response.text
    return ret

def call_agent(prompt, language="en"):
    """
    Question -> report, text in text out
    Expected time to run: ~10-12min
    Output: {'result': 'success', 'data': <txt>}
    """
    noah_api_url = f"https://staging.noahai.co/api/tool_test/"
    body = {"language": language, "user_prompt":prompt, "tool": "agent"}
    headers = {
        "accept": "application/json",
        "content-type": "application/json",
        "authorization": "Token b85ee49cde4db576eee2522c6c340e7f967537e2"}
    response = requests.post(noah_api_url, data=json.dumps(body), headers=headers, timeout=1200, allow_redirects=True, verify=False)
    try: ret = response.json()
    except: ret = response.text
    return ret

#print(call_slot_fill_tool('减肥药的最新竞争格局', 'Drug-Analysis'))
# print(call_tool('减肥药的最新竞争格局', 'Drug-Analysis'))
print(call_agent('减肥药的最新竞争格局'))


{'error': 'the JSON object must be str, bytes or bytearray, not dict'}
