In [4]:
import json, re
from pathlib import Path

# 필요시 경로만 바꿔주세요.
ONT_PATH = Path("/home/heodnjswns/DermAgent/dataset/Derm1M/ontology.json")

# 온톨로지 로드
with open(ONT_PATH, "r", encoding="utf-8") as f:
    ontology = json.load(f)

# 부모 맵과 정규화 맵 준비
parent = {}
nodes = set(ontology.keys())
for node, children in ontology.items():
    nodes.update(children)
    for child in children:
        parent[child] = node

_norm_re = re.compile(r"\s+")
def norm(text: str) -> str:
    return _norm_re.sub(" ", text.strip().lower())

norm_map = {}
for n in nodes:
    norm_map.setdefault(norm(n), set()).add(n)

def path_to_root(node: str):
    if node not in parent and node not in ontology:
        return []
    p = [node]
    while p[-1] in parent:
        p.append(parent[p[-1]])
    return p  # node -> ... -> root

def lookup(label: str):
    key = norm(label)
    matches = sorted(norm_map.get(key, []))
    if not matches:
        return {"input": label, "found": False, "matches": [], "depth": None, "path_root_to_node": []}
    node = matches[0]
    path = path_to_root(node)
    depth = len(path) - 1  # root = 0
    return {
        "input": label,
        "found": True,
        "matches": matches,
        "depth": depth,
        "path_root_to_node": list(reversed(path)),
    }

# 사용 예시
lookup("malignant")


{'input': 'malignant',
 'found': True,
 'matches': ['malignant'],
 'depth': 2,
 'path_root_to_node': ['root', 'proliferations', 'malignant']}

In [6]:
import pandas as pd, json
from pathlib import Path

base = Path("/home/heodnjswns/DermAgent/derm1m_exp/experiments/outputs/20251204_100651_test")

# 1) 메트릭 요약
metrics = pd.read_csv(base / "evaluation/metrics_summary.csv")
display(metrics)

# 2) 상세 메서드별 샘플 정보
detailed = pd.read_csv(base / "evaluation/detailed_analysis.csv")
print("\n[detailed_analysis 컬럼]", detailed.columns.tolist())
display(
    detailed.head()
)

# 메서드별 핵심 지표 상위 10 (존재하는 컬럼만 선택)
cols_want = ["method", "exact_match", "hierarchical_similarity", "tree_distance",
             "confidence"]
cols_exist = [c for c in cols_want if c in detailed.columns]
print("\n[메서드별 상위 10]")
display(
    detailed.sort_values("exact_match", ascending=False)
            .loc[:, cols_exist]
            .head(10)
)

# 3) per-sample 비교 (메서드별 예측/정확도)
per_sample = pd.read_csv(base / "evaluation/per_sample_comparison.csv")
print("\n[per_sample_comparison 컬럼 일부]", per_sample.columns.tolist()[:12])
print("\n[best_method 빈도]")
display(per_sample["best_method"].value_counts())

# 4) 방법 간 차이 (JSON)
with open(base / "evaluation/method_differences.json", "r", encoding="utf-8") as f:
    diff = json.load(f)

print("\n[방법 간 우위 요약]")
for k, v in diff.items():
    if isinstance(v, dict):
        print(f"{k}: {v}")
    else:
        print(f"{k}: {v}")

# 5) 예측 파일 목록 & 일부 확인
pred_dir = base / "predictions"
print("\n[예측 CSV 목록]")
for p in sorted(pred_dir.glob("*.csv")):
    print(" -", p.name)

# 예시로 첫 파일 3행만 보기
sample_pred = pred_dir / "1_baseline_labels.csv"
if sample_pred.exists():
    print("\n[1_baseline_labels.csv 앞부분]")
    display(pd.read_csv(sample_pred).head(3))


Unnamed: 0,method,exact_match,partial_match,hierarchical_f1,avg_distance,partial_credit,total_samples,valid_samples,level_1_acc,level_2_acc,level_3_acc,level_4_acc,level_5_acc,top_1_accuracy,top_3_accuracy,top_5_accuracy,top_1_h_f1,top_3_h_f1,top_5_h_f1
0,baseline_labels,0.4,0.4,0.4,0.0,0.0,5,5,0.0,0.0,0.0,0.0,0.0,,,,,,
1,baseline_no_labels,0.6,0.6,0.6,0.0,0.0,5,5,0.0,0.0,0.0,0.0,0.0,,,,,,
2,baseline_hierarchical,0.2,0.2,0.2,0.0,0.0,5,5,0.0,0.0,0.0,0.0,0.0,,,,,,
3,dermatology_agent,0.6,0.6,0.6,0.0,0.0,5,5,0.0,0.0,0.0,0.0,0.0,0.6,0.6,0.6,0.6,0.6,0.6
4,react_agent,1.0,1.0,1.0,0.0,0.0,5,5,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0



[detailed_analysis 컬럼] ['sample_id', 'filename', 'ground_truth', 'method', 'prediction', 'exact_match', 'hierarchical_similarity', 'tree_distance', 'common_ancestor', 'confidence', 'reasoning_summary']


Unnamed: 0,sample_id,filename,ground_truth,method,prediction,exact_match,hierarchical_similarity,tree_distance,common_ancestor,confidence,reasoning_summary
0,0,pubmed/fa_03_PMC6872615_JVIM-33-2770-g004_5.jpg,no definitive diagnosis,baseline_labels,no definitive diagnosis,1,1.0,-1,,1.0,The image does not depict human skin; it appea...
1,1,youtube/IXZC_NOKpq0_frame_20623_0_0.jpg,no definitive diagnosis,baseline_labels,no definitive diagnosis,1,1.0,-1,,0.8,The image shows a person undergoing a treatmen...
2,2,pubmed/5e_ea_PMC7007033_abd-94-04-0491-g03_1.jpg,no definitive diagnosis,baseline_labels,Beau's lines,0,0.0,-1,,0.9,The image shows a transverse depression across...
3,3,pubmed/42_f3_PMC8202470_IDOJ_12_454_g001_0.png,no definitive diagnosis,baseline_labels,Allergic contact dermatitis,0,0.0,-1,,0.85,The image shows erythema and mild scaling on t...
4,4,youtube/0JGlIffYRuM_frame_4049_0.jpg,no definitive diagnosis,baseline_labels,Basal cell carcinoma,0,0.0,-1,,0.9,"The image shows a pearly, pinkish lesion with ..."



[메서드별 상위 10]


Unnamed: 0,method,exact_match,hierarchical_similarity,tree_distance,confidence
0,baseline_labels,1,1.0,-1,1.0
1,baseline_labels,1,1.0,-1,0.8
7,baseline_no_labels,1,1.0,-1,0.7
6,baseline_no_labels,1,1.0,-1,0.6
5,baseline_no_labels,1,1.0,-1,1.0
23,react_agent,1,1.0,-1,0.5
15,dermatology_agent,1,1.0,-1,0.5
16,dermatology_agent,1,1.0,-1,0.5
10,baseline_hierarchical,1,1.0,-1,0.0
24,react_agent,1,1.0,-1,0.5



[per_sample_comparison 컬럼 일부] ['sample_id', 'filename', 'ground_truth', 'hierarchical_gt', 'm1_pred', 'm1_exact', 'm1_dist', 'm1_conf', 'm2_pred', 'm2_exact', 'm2_dist', 'm2_conf']

[best_method 빈도]


best_method
none    5
Name: count, dtype: int64


[방법 간 우위 요약]
all_correct_count: 1
all_wrong_count: 0
different_count: 4
different_samples: [{'sample_id': 1, 'ground_truth': 'no definitive diagnosis', 'predictions': {'baseline_labels': 'no definitive diagnosis', 'baseline_no_labels': 'no definitive diagnosis', 'baseline_hierarchical': 'Burn of skin', 'dermatology_agent': 'no definitive diagnosis', 'react_agent': 'no definitive diagnosis'}, 'correct_methods': ['baseline_labels', 'baseline_no_labels', 'dermatology_agent', 'react_agent']}, {'sample_id': 2, 'ground_truth': 'no definitive diagnosis', 'predictions': {'baseline_labels': "Beau's lines", 'baseline_no_labels': 'no definitive diagnosis', 'baseline_hierarchical': 'Leukonychia', 'dermatology_agent': 'no definitive diagnosis', 'react_agent': 'no definitive diagnosis'}, 'correct_methods': ['baseline_no_labels', 'dermatology_agent', 'react_agent']}, {'sample_id': 3, 'ground_truth': 'no definitive diagnosis', 'predictions': {'baseline_labels': 'Allergic contact dermatitis', 'baselin

Unnamed: 0,sample_id,filename,ground_truth,hierarchical_gt,prediction,confidence,reasoning,raw_response,all_predictions
0,0,pubmed/fa_03_PMC6872615_JVIM-33-2770-g004_5.jpg,no definitive diagnosis,,no definitive diagnosis,1.0,The image does not depict human skin; it appea...,"```json\n{\n ""disease_label"": ""no definitiv...",[]
1,1,youtube/IXZC_NOKpq0_frame_20623_0_0.jpg,no definitive diagnosis,no definitive diagnosis,no definitive diagnosis,0.8,The image shows a person undergoing a treatmen...,"```json\n{\n ""disease_label"": ""no definitiv...",[]
2,2,pubmed/5e_ea_PMC7007033_abd-94-04-0491-g03_1.jpg,no definitive diagnosis,,Beau's lines,0.9,The image shows a transverse depression across...,"```json\n{\n ""disease_label"": ""Beau's lines...",[]
