In [5]:
# -*- coding: utf-8 -*-
"""
PhenoTagger FastAPI Usage Examples
Examples showing how to use the FastAPI endpoints
"""

import requests
import json
import time

# API base URL (change this to your server URL)
BASE_URL = "http://192.168.5.77:8111"

def test_health_check():
    """Test the health check endpoint"""
    print("=== Health Check ===")
    
    response = requests.get(f"{BASE_URL}/health")
    
    if response.status_code == 200:
        data = response.json()
        print(f"Status: {data['status']}")
        print(f"Model: {data['model_info']}")
        print("✓ API is healthy")
    else:
        print(f"✗ Health check failed: {response.status_code}")
    print()


def batch_annotation(texts, threshold=0.95, only_longest=False, abbr_recognition=True):
    """Test batch text annotation with large dataset"""

    
    data = {
        "texts": texts,
        "threshold": threshold,
        "only_longest": only_longest,
        "abbr_recognition": abbr_recognition
    }
    
    # Make request
    response = requests.post(f"{BASE_URL}/annotate/batch", json=data)
    
    if response.status_code == 200:
        result = response.json()
        print(f"Processed {result['count']} texts in {result['total_processing_time']:.3f}s")
        print(f"Average time per text: {result['total_processing_time']/result['count']:.3f}s")
        return result
    else:
        print(f"✗ Batch annotation failed: {response.status_code}")
        print(response.text)
    print()

def test_configuration():
    """Test configuration endpoint"""
    print("=== Configuration ===")
    
    response = requests.get(f"{BASE_URL}/config")
    
    if response.status_code == 200:
        config = response.json()
        print("Model Info:")
        for key, value in config['model_info'].items():
            print(f"  {key}: {value}")
        
        print("\nEnvironment:")
        env_info = config['environment']
        print(f"  Model Type: {env_info['processing_params']['model_type']}")
        print(f"  Threshold: {env_info['processing_params']['ML_Threshold']}")
        print(f"  Only Longest: {env_info['processing_params']['onlyLongest']}")
        print("✓ Configuration retrieved")
    else:
        print(f"✗ Configuration failed: {response.status_code}")
    print()

def test_parameter_variations():
    """Test different parameter combinations"""
    print("=== Parameter Variations ===")
    
    text = "The patient has developmental delay, seizures, and intellectual disability."
    
    # Test different parameter combinations
    parameter_sets = [
        {"threshold": 0.95, "only_longest": True, "abbr_recognition": False},
        {"threshold": 0.90, "only_longest": False, "abbr_recognition": True},
        {"threshold": 0.85, "only_longest": True, "abbr_recognition": True}
    ]
    
    print(f"Testing text: {text}\n")
    
    for i, params in enumerate(parameter_sets, 1):
        data = {"text": text, **params}
        response = requests.post(f"{BASE_URL}/annotate", json=data)
        
        if response.status_code == 200:
            result = response.json()
            print(f"Test {i} - Parameters: {params}")
            print(f"HPO Terms: {result['hpo_terms']}")
            print(f"HPO IDs: {result['hpo_ids']}")
            print(f"Time: {result['processing_time']:.3f}s")
        else:
            print(f"Test {i} failed: {response.status_code}")
        print()

def test_large_batch():
    """Test very large batch processing"""
    print("=== Large Batch Test ===")
    
    # Generate a large number of texts for stress testing
    base_texts = [
        "Patient has seizures and developmental delay.",
        "Clinical findings include intellectual disability.",
        "Neurological examination shows tremor and ataxia.",
        "The child exhibits growth retardation.",
        "Patient presents with microcephaly and motor delays."
    ]
    
    # Create a batch of 500 texts
    large_batch = base_texts * 100  # 500 texts
    
    print(f"Testing with {len(large_batch)} texts...")
    
    data = {
        "texts": large_batch,
        "threshold": 0.95,
        "only_longest": True
    }
    
    start_time = time.time()
    response = requests.post(f"{BASE_URL}/annotate/batch", json=data)
    request_time = time.time() - start_time
    
    if response.status_code == 200:
        result = response.json()
        print(f"✓ Successfully processed {result['count']} texts")
        print(f"Server processing time: {result['total_processing_time']:.3f}s")
        print(f"Total request time: {request_time:.3f}s")
        print(f"Average time per text: {result['total_processing_time']/result['count']:.4f}s")
        
        # Show sample results
        print("\nSample results:")
        for i, annotation in enumerate(result['results'][:2], 1):
            print(f"{i}. Text: {annotation['text']}")
            print(f"   HPO IDs: {annotation['hpo_ids']}")
        
    else:
        print(f"✗ Large batch test failed: {response.status_code}")
        print(response.text)
    print()

def test_error_handling():
    """Test error handling"""
    print("=== Error Handling ===")

In [6]:
test_configuration()

=== Configuration ===
Model Info:
  model_type: pubmedbert
  threshold: 0.95
  only_longest: False
  abbr_recognition: True
  api_version: 2.0

Environment:
  Model Type: pubmedbert
  Threshold: 0.95
  Only Longest: False
✓ Configuration retrieved



In [30]:
import pandas as pd

def add_hpo_evaluation_metrics(df, true_hpo_col, predict_hpo_col):
    """
    為DataFrame添加HPO評估指標欄位
    
    Args:
        df: 輸入DataFrame
        true_hpo_col: 真實HPO欄位名稱 (字串格式，用';'分隔)
        predict_hpo_col: 預測HPO欄位名稱 (字串格式，用';'分隔)
        
    Returns:
        df: 添加了TP, FP, TN, FN, precision, recall, f1_score欄位的DataFrame
    """
    
    # 複製DataFrame避免修改原始資料
    result_df = df.copy()
    
    # 收集所有真實HPO作為完整集合 (用於計算TN)
    all_true_hpos = set()
    for hpo_str in result_df[true_hpo_col].dropna():
        if hpo_str:  # 檢查非空字串
            all_true_hpos.update(hpo_str.split(';'))
    
    print(f"資料集中共有 {len(all_true_hpos)} 個不同的真實HPO")
    
    # 初始化新欄位
    result_df['TP'] = 0
    result_df['FP'] = 0
    result_df['TN'] = 0
    result_df['FN'] = 0
    result_df['precision'] = 0.0
    result_df['recall'] = 0.0
    result_df['f1_score'] = 0.0
    
    # 逐行計算評估指標
    for idx, row in result_df.iterrows():
        # 解析HPO集合
        true_hpos = set(row[true_hpo_col].split(';')) if pd.notna(row[true_hpo_col]) and row[true_hpo_col] else set()
        pred_hpos = set(row[predict_hpo_col].split(';')) if pd.notna(row[predict_hpo_col]) and row[predict_hpo_col] else set()
        
        # 計算混淆矩陣
        tp = len(true_hpos & pred_hpos)  # 交集：預測正確的
        fp = len(pred_hpos - true_hpos)  # 預測有但真實沒有
        fn = len(true_hpos - pred_hpos)  # 真實有但預測沒有
        tn = len(all_true_hpos - true_hpos - pred_hpos)  # 資料集中存在但此樣本都沒有
        
        # 計算評估指標
        precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
        recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
        f1_score = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0.0
        
        # 更新DataFrame
        result_df.loc[idx, 'TP'] = tp
        result_df.loc[idx, 'FP'] = fp
        result_df.loc[idx, 'TN'] = tn
        result_df.loc[idx, 'FN'] = fn
        result_df.loc[idx, 'precision'] = precision
        result_df.loc[idx, 'recall'] = recall
        result_df.loc[idx, 'f1_score'] = f1_score
    
    return result_df

def get_overall_metrics(df):
    """
    計算整體評估指標 (包含 Micro-average 和 Macro-average)
    
    Args:
        df: 包含TP, FP, TN, FN欄位的DataFrame
        
    Returns:
        dict: 整體評估指標
    """
    
    total_tp = df['TP'].sum()
    total_fp = df['FP'].sum()
    total_tn = df['TN'].sum()
    total_fn = df['FN'].sum()
    
    # Micro-average 指標 (全局加總後計算)
    micro_precision = total_tp / (total_tp + total_fp) if (total_tp + total_fp) > 0 else 0.0
    micro_recall = total_tp / (total_tp + total_fn) if (total_tp + total_fn) > 0 else 0.0
    micro_f1 = 2 * micro_precision * micro_recall / (micro_precision + micro_recall) if (micro_precision + micro_recall) > 0 else 0.0
    micro_accuracy = (total_tp + total_tn) / (total_tp + total_fp + total_tn + total_fn) if (total_tp + total_fp + total_tn + total_fn) > 0 else 0.0
    
    # Macro-average 指標 (每個樣本指標的平均，論文使用的方法)
    macro_precision = df['precision'].mean()
    macro_recall = df['recall'].mean()
    macro_f1 = df['f1_score'].mean()
    macro_accuracy = ((df['TP'] + df['TN']) / (df['TP'] + df['FP'] + df['TN'] + df['FN'])).mean()
    
    return {
        'total_samples': len(df),
        'total_TP': total_tp,
        'total_FP': total_fp,
        'total_TN': total_tn,
        'total_FN': total_fn,
        
        # Micro-average (全局指標)
        'micro_precision': micro_precision,
        'micro_recall': micro_recall,
        'micro_f1_score': micro_f1,
        'micro_accuracy': micro_accuracy,
        
        # Macro-average (論文使用的方法)
        'macro_precision': macro_precision,
        'macro_recall': macro_recall,
        'macro_f1_score': macro_f1,
        'macro_accuracy': macro_accuracy,
        
        # 為了向後兼容保留舊名稱
        'overall_precision': micro_precision,
        'overall_recall': micro_recall,
        'overall_f1_score': micro_f1,
        'overall_accuracy': micro_accuracy,
        'avg_precision': macro_precision,
        'avg_recall': macro_recall,
        'avg_f1_score': macro_f1
    }

# 使用示例
if __name__ == "__main__":
    # 創建示例DataFrame
    sample_data = {
        'patient_id': ['P001', 'P002', 'P003', 'P004', 'P005'],
        'true_hpo': [
            'HP:0001250;HP:0002376;HP:0001263',
            'HP:0001250;HP:0003674',
            'HP:0002376;HP:0001263;HP:0004944',
            'HP:0001250',
            'HP:0003674;HP:0004944'
        ],
        'pred_hpo': [
            'HP:0001250;HP:0001263;HP:0003674',
            'HP:0001250;HP:0002376',
            'HP:0002376;HP:0001263',
            'HP:0001250;HP:0004944',
            'HP:0003674'
        ]
    }
    
    df = pd.DataFrame(sample_data)
    print("原始DataFrame:")
    print(df)
    print("\n" + "="*80)
    
    # 添加評估指標
    result_df = add_hpo_evaluation_metrics(df, 'true_hpo', 'pred_hpo')
    
    print("\n添加評估指標後的DataFrame:")
    print(result_df[['patient_id', 'TP', 'FP', 'TN', 'FN', 'precision', 'recall', 'f1_score']].round(3))
    
    # 計算整體指標
    overall = get_overall_metrics(result_df)
    print("\n" + "="*80)
    print("整體評估指標:")
    print("="*80)
    print(f"總樣本數: {overall['total_samples']}")
    print(f"總TP: {overall['total_TP']}, 總FP: {overall['total_FP']}")
    print(f"總TN: {overall['total_TN']}, 總FN: {overall['total_FN']}")
    
    print("\n【Micro-average 指標】(全局計算)")
    print(f"Micro Precision(TP / TP+FP): {overall['micro_precision']:.3f}")
    print(f"Micro Recall: {overall['micro_recall']:.3f}")
    print(f"Micro F1-Score: {overall['micro_f1_score']:.3f}")
    print(f"Micro Accuracy: {overall['micro_accuracy']:.3f}")
    
    print("\n【Macro-average 指標】(論文使用的方法)")
    print(f"Macro Precision: {overall['macro_precision']:.3f}")
    print(f"Macro Recall: {overall['macro_recall']:.3f}")
    print(f"Macro F1-Score: {overall['macro_f1_score']:.3f}")
    print(f"Macro Accuracy: {overall['macro_accuracy']:.3f}")
    
    print("\n※ 論文中的結果應該對比 Macro-average 指標")

原始DataFrame:
  patient_id                          true_hpo  \
0       P001  HP:0001250;HP:0002376;HP:0001263   
1       P002             HP:0001250;HP:0003674   
2       P003  HP:0002376;HP:0001263;HP:0004944   
3       P004                        HP:0001250   
4       P005             HP:0003674;HP:0004944   

                           pred_hpo  
0  HP:0001250;HP:0001263;HP:0003674  
1             HP:0001250;HP:0002376  
2             HP:0002376;HP:0001263  
3             HP:0001250;HP:0004944  
4                        HP:0003674  

資料集中共有 5 個不同的真實HPO

添加評估指標後的DataFrame:
  patient_id  TP  FP  TN  FN  precision  recall  f1_score
0       P001   2   1   1   1      0.667   0.667     0.667
1       P002   1   1   2   1      0.500   0.500     0.500
2       P003   2   0   2   1      1.000   0.667     0.800
3       P004   1   1   3   0      0.500   1.000     0.667
4       P005   1   0   3   1      1.000   0.500     0.667

整體評估指標:
總樣本數: 5
總TP: 7, 總FP: 3
總TN: 11, 總FN: 4

【Micro-average 指標】(全局

In [8]:
def hpo_reformat(x):
    if x == '-':
        return x
    hpo_list = x.split(';')
    return ';'.join(hpo.replace('_',':') for hpo in hpo_list)

In [9]:
import pandas as pd
biolarkgsc = pd.read_csv('../PhenoBERT/reference/HPO concept recognition/BiolarkGSC/biolarkgsc.csv', sep='\t')
# cpod = pd.read_csv('../PhenoBERT/reference/HPO concept recognition/COPD-HPO/copd.csv',sep='\t')
# phenochf = pd.read_csv('../PhenoBERT/reference/HPO concept recognition/PhenoCHF/phenochf.csv',sep='\t')

In [10]:
biolarkgsc['dataset'] = 'biolarkgsc'
# phenochf['dataset'] = 'phenochf'
# cpod['dataset'] = 'copd'

#total_dataset = pd.concat([biolarkgsc, cpod, phenochf], ignore_index=True)
biolarkgsc['labels'] = biolarkgsc['labels'].fillna('-')
biolarkgsc['hpo_ids'] = biolarkgsc['labels'].apply(hpo_reformat)

In [23]:
biolarkgsc = biolarkgsc.rename(columns={'text':'clinical_summary'})
biolarkgsc['clinical_summary'] = biolarkgsc['clinical_summary'].fillna('-')
biolarkgsc = biolarkgsc[['id','clinical_summary','hpo_ids','dataset']]
biolarkgsc = biolarkgsc[(biolarkgsc['clinical_summary'] !='-') & (biolarkgsc['hpo_ids'] != '-')]
biolarkgsc.shape[0]

228

In [12]:
df_hpo_mapping = pd.read_csv('../PhenoBERT/reference/2025-08-01_orphanet_WGS_database(HPO_ID_Mapping_v20250506).csv', sep=',')

hpo_mapping_dict = {}
for idx in df_hpo_mapping.index:
    hpo_mapping_dict[df_hpo_mapping.input_hpo_id[idx]] = df_hpo_mapping.mapped_main_id[idx]

def hpo_map(hpo_id_list_str):
    if hpo_id_list_str =='-':
        return '-'
    hpo_id_list = hpo_id_list_str.split(';')
    normalized_hpo_id_list = []
    for hpo_id in hpo_idwwww_list:
        if hpo_id not in normalized_hpo_id_list:
            normalized_hpo_id_list.append(hpo_mapping_dict.get(hpo_id,'-'))
    return ';'.join(normalized_hpo_id_list)

def accuracy_calculator(true_hpo_str, pred_hpo_str):
    true_hpo_list = true_hpo_str.split(';')
    pred_hpo_list = pred_hpo_str.split(';')
    mapped_hpo_list = [hpo for hpo in pred_hpo_list if hpo in true_hpo_list]
    return len(mapped_hpo_list)/len(true_hpo_list)

In [50]:
print('model:bioformer')
pheno_tagger_output = batch_annotation(
    biolarkgsc['clinical_summary'].to_list(),
    threshold=0.95, only_longest=True, abbr_recognition=True)

model:bioformer


Processed 228 texts in 68.937s
Average time per text: 0.302s


In [51]:
pheno_tagger_hpo_ids = [data['hpo_ids'] for data in pheno_tagger_output['results']]
biolarkgsc['pheno_tagger_hpo_ids'] = pheno_tagger_hpo_ids
biolarkgsc['normalized_hpo_ids'] = biolarkgsc['hpo_ids'].apply(hpo_map)
biolarkgsc['normalized_pheno_tagger_hpo_ids'] = biolarkgsc['pheno_tagger_hpo_ids'].apply(hpo_map)
biolarkgsc['pheno_tagger_accuracy'] = biolarkgsc.apply(lambda x:accuracy_calculator(x.normalized_hpo_ids, x.normalized_pheno_tagger_hpo_ids), axis=1)

In [53]:
biolarkgsc = add_hpo_evaluation_metrics(biolarkgsc,'normalized_hpo_ids','normalized_pheno_tagger_hpo_ids')
overall = get_overall_metrics(biolarkgsc)
print("\n" + "="*80)
print("整體評估指標:")
print("="*80)
print(f"總樣本數: {overall['total_samples']}")
print(f"總TP: {overall['total_TP']}, 總FP: {overall['total_FP']}")
print(f"總TN: {overall['total_TN']}, 總FN: {overall['total_FN']}")

print("\n【Micro-average 指標】(全局計算)")
print(f"Micro Precision (TP/TP+FP): {overall['micro_precision']:.3f}")
print(f"Micro Recall (TP/TP+FN): {overall['micro_recall']:.3f}")
print(f"Micro F1-Score (2 * Recall * Precision / (Recall+Precision)): {overall['micro_f1_score']:.3f}")
print(f"Micro Accuracy: {overall['micro_accuracy']:.3f}")

print("\n【Macro-average 指標】(論文使用的方法)")
print(f"Macro Precision (TP/TP+FP): {overall['macro_precision']:.3f}")
print(f"Macro Recall (TP/TP+FN): {overall['macro_recall']:.3f}")
print(f"Macro F1-Score (2 * Recall * Precision / (Recall+Precision)): {overall['macro_f1_score']:.3f}")
print(f"Macro Accuracy: {overall['macro_accuracy']:.3f}")

print("\n※ 論文中的結果應該對比 Macro-average 指標")

資料集中共有 461 個不同的真實HPO



整體評估指標:
總樣本數: 228
總TP: 852, 總FP: 203
總TN: 103342, 總FN: 855

【Micro-average 指標】(全局計算)
Micro Precision (TP/TP+FP): 0.808
Micro Recall (TP/TP+FN): 0.499
Micro F1-Score (2 * Recall * Precision / (Recall+Precision)): 0.617
Micro Accuracy: 0.990

【Macro-average 指標】(論文使用的方法)
Macro Precision (TP/TP+FP): 0.775
Macro Recall (TP/TP+FN): 0.503
Macro F1-Score (2 * Recall * Precision / (Recall+Precision)): 0.590
Macro Accuracy: 0.990

※ 論文中的結果應該對比 Macro-average 指標


In [26]:
biolarkgsc['pheno_tagger_accuracy'].mean()

0.5837626325546885

In [35]:
biolarkgsc['pheno_tagger_accuracy'].shape[0]

228

In [43]:
biolarkgsc.head()

Unnamed: 0,id,clinical_summary,hpo_ids,dataset,pheno_tagger_hpo_ids,normalized_hpo_ids,normalized_pheno_tagger_hpo_ids,pheno_tagger_accuracy
0,1003450,A syndrome of brachydactyly (absence of some m...,HP:0001156;HP:0009881;HP:0001798;HP:0001792;HP...,biolarkgsc,HP:0001156;HP:0008386;HP:0001792;HP:0006152;HP...,HP:0001156;HP:0009881;HP:0001798;HP:0001792;HP...,HP:0001156;HP:0008386;HP:0001792;HP:0006152;HP...,0.4
1,10051003,Townes-Brocks syndrome (TBS) is an autosomal d...,HP:0000006;HP:0000006;HP:0000006;HP:0003828;HP...,biolarkgsc,HP:0000356;HP:0000365;HP:0100258;HP:0010442;HP...,HP:0000006;HP:0003828;HP:0003812;HP:0000356;HP...,HP:0000356;HP:0000365;HP:0100258;HP:0010442;HP...,0.583333
2,10066029,Nevoid basal cell carcinoma syndrome (NBCCS) i...,HP:0002671;HP:0000006;HP:0000006;HP:0000006;HP...,biolarkgsc,HP:0002671;HP:0030731;HP:0002671;HP:0030731;HP...,HP:0002671;HP:0000006;HP:0003828;HP:0003812;HP...,HP:0002671;HP:0030731;HP:0010603;HP:0010610;HP...,0.636364
3,10196695,Angelman syndrome (AS) is a neurodevelopmental...,HP:0000707;HP:0001466,biolarkgsc,HP:0012759;HP:0030868;HP:0030868,HP:0000707;HP:0001466,HP:0012759;HP:0030868,0.0
4,10417280,Prader-Willi syndrome (PWS) and Angelman syndr...,HP:0000708;HP:0003745,biolarkgsc,HP:0012452;HP:0000708,HP:0000708;HP:0003745,HP:0012452;HP:0000708,0.5
