In [6]:
import pandas as pd
import numpy as np
from pathlib import Path
import sys

# 유연한 경로 탐색: 현재/부모 경로에서 DermAgent/eval 찾기
eval_path = None
for cand in [Path.cwd(), *Path.cwd().parents]:
    maybe = cand / "DermAgent" / "eval" / "evaluation_metrics.py"
    if maybe.exists():
        eval_path = maybe.parent
        break
if eval_path is None:
    raise FileNotFoundError("DermAgent/eval/evaluation_metrics.py 위치를 찾을 수 없습니다.")
sys.path.insert(0, str(eval_path))
from evaluation_metrics import HierarchicalEvaluator  # noqa: E402

# 파일 경로
root = Path("/home/work/wonjun/DermAgent/derm1m_exp/baseline/outputs/disease_label")
csv_path = root / "all_predictions_summary.csv"
ontology_path = Path("/home/work/wonjun/DermAgent/dataset/Derm1M/ontology.json")

df = pd.read_csv(csv_path)
evaluator = HierarchicalEvaluator(str(ontology_path))
tree = evaluator.tree

def hier_sim(gt, pred):
    if isinstance(gt, str) and gt and isinstance(pred, str) and pred:
        return evaluator.hierarchical_similarity(gt, pred)
    return np.nan

def hier_dist(gt, pred):
    if isinstance(gt, str) and gt and isinstance(pred, str) and pred:
        return tree.get_hierarchical_distance(gt, pred)
    return np.nan

df["hier_sim"] = df.apply(lambda r: hier_sim(r["ground_truth_canonical"], r["predicted_canonical"]), axis=1)
df["hier_dist"] = df.apply(lambda r: hier_dist(r["ground_truth_canonical"], r["predicted_canonical"]), axis=1)

summary = (
    df.groupby("source_file")
      .agg(
          total=("filename", "size"),
          valid_gt=("valid_gt", "sum"),
          with_pred=("predicted_canonical", lambda x: x.notna().sum()),
          exact_match_sum=("exact_match", "sum"),
          hier_sim_mean=("hier_sim", "mean"),
          hier_dist_mean=("hier_dist", "mean"),
      )
      .assign(
          exact_match_rate=lambda x: x.exact_match_sum / x.valid_gt.replace(0, np.nan),
          valid_ratio=lambda x: x.valid_gt / x.total
      )
      .sort_values("exact_match_rate", ascending=False)
)

print("모델별 요약 (정확도 + 온톨로지 유사도):")
print(summary)

top_close = (
    df[df["hier_sim"].notna()]
    .sort_values("hier_sim", ascending=False)
    [["source_file", "filename", "ground_truth_canonical", "predicted_canonical", "hier_sim", "hier_dist"]]
    .head(10)
)
print("\n온톨로지상 가장 가까운 예측 TOP 10:")
print(top_close)

missing_pred = df[(df["valid_gt"] == 1) & (df["predicted_canonical"] == "")]
print("\n유효 GT지만 예측 라벨이 없는/매핑 안 된 사례 10개:")
print(missing_pred.head(10))


모델별 요약 (정확도 + 온톨로지 유사도):
                               total  valid_gt  with_pred  exact_match_sum  \
source_file                                                                  
gpt4o_predictions.csv            100        49         80                4   
qwen3vl32b_predictions.csv       100        49         93                3   
internvl3_14b_predictions.csv    100        49         92                2   
qwen3vl_7b_predictions.csv       100        49         75                2   
internvl3_8b_predictions.csv     100        49         94                1   

                               hier_sim_mean  hier_dist_mean  \
source_file                                                    
gpt4o_predictions.csv               0.225900        5.146341   
qwen3vl32b_predictions.csv          0.190265        5.066667   
internvl3_14b_predictions.csv       0.205238        4.933333   
qwen3vl_7b_predictions.csv          0.192017        5.117647   
internvl3_8b_predictions.csv        0.150104

In [7]:
import pandas as pd
import numpy as np
from pathlib import Path
import sys

# 유연한 경로 탐색: 현재/부모 경로에서 DermAgent/eval 찾기
eval_path = None
for cand in [Path.cwd(), *Path.cwd().parents]:
    maybe = cand / "DermAgent" / "eval" / "evaluation_metrics.py"
    if maybe.exists():
        eval_path = maybe.parent
        break
if eval_path is None:
    raise FileNotFoundError("DermAgent/eval/evaluation_metrics.py 위치를 찾을 수 없습니다.")
sys.path.insert(0, str(eval_path))
from evaluation_metrics import HierarchicalEvaluator  # noqa: E402

# 파일 경로
root = Path("/home/work/wonjun/DermAgent/derm1m_exp/baseline/outputs/no_disease_label")
csv_path = root / "all_predictions_summary.csv"
ontology_path = Path("/home/work/wonjun/DermAgent/dataset/Derm1M/ontology.json")

df = pd.read_csv(csv_path)
evaluator = HierarchicalEvaluator(str(ontology_path))
tree = evaluator.tree

def hier_sim(gt, pred):
    if isinstance(gt, str) and gt and isinstance(pred, str) and pred:
        return evaluator.hierarchical_similarity(gt, pred)
    return np.nan

def hier_dist(gt, pred):
    if isinstance(gt, str) and gt and isinstance(pred, str) and pred:
        return tree.get_hierarchical_distance(gt, pred)
    return np.nan

df["hier_sim"] = df.apply(lambda r: hier_sim(r["ground_truth_canonical"], r["predicted_canonical"]), axis=1)
df["hier_dist"] = df.apply(lambda r: hier_dist(r["ground_truth_canonical"], r["predicted_canonical"]), axis=1)

summary = (
    df.groupby("source_file")
      .agg(
          total=("filename", "size"),
          valid_gt=("valid_gt", "sum"),
          with_pred=("predicted_canonical", lambda x: x.notna().sum()),
          exact_match_sum=("exact_match", "sum"),
          hier_sim_mean=("hier_sim", "mean"),
          hier_dist_mean=("hier_dist", "mean"),
      )
      .assign(
          exact_match_rate=lambda x: x.exact_match_sum / x.valid_gt.replace(0, np.nan),
          valid_ratio=lambda x: x.valid_gt / x.total
      )
      .sort_values("exact_match_rate", ascending=False)
)

print("모델별 요약 (정확도 + 온톨로지 유사도):")
print(summary)

top_close = (
    df[df["hier_sim"].notna()]
    .sort_values("hier_sim", ascending=False)
    [["source_file", "filename", "ground_truth_canonical", "predicted_canonical", "hier_sim", "hier_dist"]]
    .head(10)
)
print("\n온톨로지상 가장 가까운 예측 TOP 10:")
print(top_close)

missing_pred = df[(df["valid_gt"] == 1) & (df["predicted_canonical"] == "")]
print("\n유효 GT지만 예측 라벨이 없는/매핑 안 된 사례 10개:")
print(missing_pred.head(10))


모델별 요약 (정확도 + 온톨로지 유사도):
                               total  valid_gt  with_pred  exact_match_sum  \
source_file                                                                  
internvl3_8b_predictions.csv     100        49         47                3   
qwen3vl32b_predictions.csv       100        49         50                3   
internvl3_14b_predictions.csv    100        49         64                2   
qwen3vl_8b_predictions.csv       100        49         42                1   

                               hier_sim_mean  hier_dist_mean  \
source_file                                                    
internvl3_8b_predictions.csv        0.311284        4.086957   
qwen3vl32b_predictions.csv          0.228388        5.384615   
internvl3_14b_predictions.csv       0.244124        5.096774   
qwen3vl_8b_predictions.csv          0.261153        4.736842   

                               exact_match_rate  valid_ratio  
source_file                                               