In [1]:
import json

def parse_bbox(text):
    try:
        box = json.loads(text)
        if isinstance(box, list) and len(box) == 4 and all(isinstance(x, (int, float)) for x in box):
            return box
    except:
        pass

    try:
        box = [float(x.strip()) for x in text.strip().split(",")]
        if len(box) == 4:
            return box
    except:
        pass

    return None

def compute_iou(box1, box2):
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])
    inter = max(0, x2 - x1) * max(0, y2 - y1)
    union = (box1[2] - box1[0]) * (box1[3] - box1[1]) + (box2[2] - box2[0]) * (box2[3] - box2[1]) - inter
    return inter / union if union != 0 else 0.0

# 정답 로드
with open("/home/aikusrv04/aiku/small_korean_vlm/data/korean_text_in_the_wild_ocr/text_bbox_output_final.json", "r", encoding="utf-8") as f:
    ground_truth = json.load(f)

# 예측 결과 (JSONL)
with open("/home/aikusrv04/aiku/small_korean_vlm/data/korean_text_in_the_wild_ocr/text_bbox_output_final-llava-hyperclovax-korean-ocr-culture-augmented.jsonl", "r", encoding="utf-8") as f:
    model_output = [json.loads(line) for line in f if line.strip()]

# id 매핑
gt_dict = {item["id"]: item for item in ground_truth}
pred_dict = {item["id"]: item for item in model_output}

ious = []
skipped = 0

for id_, gt_item in gt_dict.items():
    if id_ not in pred_dict:
        continue
    gt_bbox = parse_bbox(gt_item["conversations"][1]["value"])
    pred_bbox = parse_bbox(pred_dict[id_]["conversations"][1]["value"])

    if gt_bbox and pred_bbox:
        iou = compute_iou(gt_bbox, pred_bbox)
        ious.append(iou)
    else:
        skipped += 1

# 결과 출력
print(f"✅ 평가된 샘플 수: {len(ious)} / {len(gt_dict)}")
print(f"⏭️ 제외된 샘플 수 (형식 오류 등): {skipped}")
print(f"📐 평균 IoU: {sum(ious) / len(ious):.4f}" if ious else "평가 가능한 샘플이 없습니다.")
print(f"🎯 IoU ≥ 0.5 정답률: {sum(i >= 0.5 for i in ious) / len(ious) * 100:.2f}%" if ious else "")
print(f"🎯 IoU ≥ 0.75 정답률: {sum(i >= 0.75 for i in ious) / len(ious) * 100:.2f}%" if ious else "")


✅ 평가된 샘플 수: 4000 / 4000
⏭️ 제외된 샘플 수 (형식 오류 등): 0
📐 평균 IoU: 0.9942
🎯 IoU ≥ 0.5 정답률: 99.42%
🎯 IoU ≥ 0.75 정답률: 99.42%


In [1]:
import json

def parse_bbox(text):
    try:
        box = json.loads(text)
        if isinstance(box, list) and len(box) == 4 and all(isinstance(x, (int, float)) for x in box):
            return box
    except:
        pass
    try:
        box = [float(x.strip()) for x in text.strip().split(",")]
        if len(box) == 4:
            return box
    except:
        pass
    return None

def compute_iou(box1, box2):
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])
    inter = max(0, x2 - x1) * max(0, y2 - y1)
    union = (box1[2] - box1[0]) * (box1[3] - box1[1]) + (box2[2] - box2[0]) * (box2[3] - box2[1]) - inter
    return inter / union if union != 0 else 0.0

# Ground Truth 로딩
with open("/home/aikusrv04/aiku/small_korean_vlm/data/korean_text_in_the_wild_ocr/text_bbox_output_final.json", "r", encoding="utf-8") as f:
    gt_data = json.load(f)

# Prediction 로딩 (.jsonl 기준)
with open("/home/aikusrv04/aiku/small_korean_vlm/data/korean_text_in_the_wild_ocr/text_bbox_output_final-llava-hyperclovax-korean-ocr-culture-augmented.jsonl", "r", encoding="utf-8") as f:
    pred_data = [json.loads(line) for line in f if line.strip()]

# id로 매핑
gt_dict = {item["id"]: item for item in gt_data}
pred_dict = {item["id"]: item for item in pred_data}

ious = []
skipped = 0

for id_, gt_item in gt_dict.items():
    if id_ not in pred_dict:
        continue

    gt_bbox = parse_bbox(gt_item["conversations"][1]["value"])
    pred_bbox = parse_bbox(pred_dict[id_].get("output", ""))

    if gt_bbox and pred_bbox:
        iou = compute_iou(gt_bbox, pred_bbox)
        ious.append(iou)
    else:
        skipped += 1

# 결과 출력
print(f"✅ 평가된 샘플 수: {len(ious)} / {len(gt_dict)}")
print(f"⏭️ 제외된 샘플 수 (형식 오류 등): {skipped}")
print(f"📐 평균 IoU: {sum(ious) / len(ious):.4f}" if ious else "평가 가능한 샘플이 없습니다.")
print(f"🎯 IoU ≥ 0.5 정답률: {sum(i >= 0.5 for i in ious) / len(ious) * 100:.2f}%" if ious else "")
print(f"🎯 IoU ≥ 0.75 정답률: {sum(i >= 0.75 for i in ious) / len(ious) * 100:.2f}%" if ious else "")


✅ 평가된 샘플 수: 881 / 4000
⏭️ 제외된 샘플 수 (형식 오류 등): 3119
📐 평균 IoU: 0.0819
🎯 IoU ≥ 0.5 정답률: 3.86%
🎯 IoU ≥ 0.75 정답률: 0.57%


In [2]:
import json

def parse_bbox(text):
    try:
        box = json.loads(text)
        if isinstance(box, list) and len(box) == 4:
            return [round(float(x), 1) for x in box]
    except:
        pass
    try:
        box = [round(float(x.strip()), 1) for x in text.strip().split(",")]
        if len(box) == 4:
            return box
    except:
        pass
    return None

# 정답 로딩
with open("/home/aikusrv04/aiku/small_korean_vlm/data/korean_text_in_the_wild_ocr/text_bbox_output_final.json", "r", encoding="utf-8") as f:
    gt_data = json.load(f)

# 예측 로딩 (.jsonl)
with open("/home/aikusrv04/aiku/small_korean_vlm/data/korean_text_in_the_wild_ocr/text_bbox_output_final-llava-hyperclovax-korean-ocr-culture-augmented.jsonl", "r", encoding="utf-8") as f:
    pred_data = [json.loads(line) for line in f if line.strip()]

# id 매핑
gt_dict = {item["id"]: item for item in gt_data}
pred_dict = {item["id"]: item for item in pred_data}

matched = 0
skipped = 0

for id_, gt_item in gt_dict.items():
    if id_ not in pred_dict:
        continue

    gt_bbox = parse_bbox(gt_item["conversations"][1]["value"])
    pred_bbox = parse_bbox(pred_dict[id_].get("output", ""))

    if gt_bbox and pred_bbox:
        if gt_bbox == pred_bbox:
            matched += 1
    else:
        skipped += 1

# 결과 출력
total = len(gt_dict)
print(f"✅ 평가된 샘플 수: {total - skipped} / {total}")
print(f"⏭️ 제외된 샘플 수: {skipped}")
print(f"🎯 소수점 1자리까지 완전히 일치한 정답률: {matched / (total - skipped) * 100:.2f}%")


✅ 평가된 샘플 수: 881 / 4000
⏭️ 제외된 샘플 수: 3119
🎯 소수점 1자리까지 완전히 일치한 정답률: 2.27%


In [13]:
import json

def parse_bbox(text):
    try:
        box = json.loads(text)
        if isinstance(box, list) and len(box) == 4:
            return [float(x) for x in box]
    except:
        pass
    try:
        box = [float(x.strip()) for x in text.strip().split(",")]
        if len(box) == 4:
            return box
    except:
        pass
    return None

def compute_iou(box1, box2):
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])
    inter = max(0, x2 - x1) * max(0, y2 - y1)
    union = (box1[2] - box1[0]) * (box1[3] - box1[1]) + (box2[2] - box2[0]) * (box2[3] - box2[1]) - inter
    return inter / union if union != 0 else 0.0

# Load data
with open("/home/aikusrv04/aiku/small_korean_vlm/data/korean_text_in_the_wild_ocr/text_bbox_output_final.json", "r", encoding="utf-8") as f:
    gt_data = json.load(f)

with open("/home/aikusrv04/aiku/small_korean_vlm/data/korean_text_in_the_wild_ocr/text_bbox_output_final-llava-hyperclovax-korean-ocr-culture-augmented.jsonl", "r", encoding="utf-8") as f:
    pred_data = [json.loads(line) for line in f if line.strip()]

gt_dict = {item["id"]: item for item in gt_data}
pred_dict = {item["id"]: item for item in pred_data}

samples = []

for id_, gt_item in gt_dict.items():
    if id_ not in pred_dict:
        continue

    gt_bbox = parse_bbox(gt_item["conversations"][1]["value"])
    pred_bbox = parse_bbox(pred_dict[id_].get("output", ""))

    if gt_bbox and pred_bbox:
        iou = compute_iou(gt_bbox, pred_bbox)
        iou_rounded = round(iou, 1)  # ⬅ 여기서 반올림
        samples.append((id_, iou_rounded))

# Top-400 추출
top_k = sorted(samples, key=lambda x: x[1], reverse=True)[:400]
ious_top = [iou for _, iou in top_k]

# 저장
top_400_ids = [id_ for id_, _ in top_k]
with open("/home/aikusrv04/aiku/small_korean_vlm/ocr_eval/korean_text_ocr/top_400_ids2.json", "w", encoding="utf-8") as f:
    json.dump(top_400_ids, f, indent=2)

print("✅ Top-400 이미지 ID 저장 완료 (top_400_ids.json)")

# 평가 출력
print(f"🎯 IoU 상위 400개 평균: {sum(ious_top)/len(ious_top):.4f}")
print(f"🎯 IoU ≥ 0.5 정확도: {sum(i >= 0.5 for i in ious_top) / len(ious_top) * 100:.2f}%")
print(f"🎯 IoU ≥ 0.75 정확도: {sum(i >= 0.75 for i in ious_top) / len(ious_top) * 100:.2f}%")
print(f"📊 Top-400 중 최고 IoU: {max(ious_top):.4f}")
print(f"📊 Top-400 중 최저 IoU: {min(ious_top):.4f}")


✅ Top-400 이미지 ID 저장 완료 (top_400_ids.json)
🎯 IoU 상위 400개 평균: 0.1802
🎯 IoU ≥ 0.5 정확도: 11.25%
🎯 IoU ≥ 0.75 정확도: 1.25%
📊 Top-400 중 최고 IoU: 0.8000
📊 Top-400 중 최저 IoU: 0.0000


In [10]:
import json
import re

def extract_and_round_bbox(text):
    """
    문자열에서 [x1, y1, x2, y2] 좌표만 추출하고 소수점 1자리로 반올림
    """
    try:
        match = re.search(r"\[([^\]]+)\]", text)
        if match:
            nums = match.group(1).split(",")
            if len(nums) == 4:
                return [round(float(n.strip()), 1) for n in nums]
    except:
        pass
    return None

def compute_iou(box1, box2):
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])
    inter_area = max(0, x2 - x1) * max(0, y2 - y1)
    union_area = (box1[2] - box1[0]) * (box1[3] - box1[1]) + \
                 (box2[2] - box2[0]) * (box2[3] - box2[1]) - inter_area
    return inter_area / union_area if union_area != 0 else 0.0

# Load data
with open("/home/aikusrv04/aiku/small_korean_vlm/data/korean_text_in_the_wild_ocr/text_bbox_output_final.json", "r", encoding="utf-8") as f:
    gt_data = json.load(f)

with open("/home/aikusrv04/aiku/small_korean_vlm/data/korean_text_in_the_wild_ocr/text_bbox_output_final_kollava.jsonl", "r", encoding="utf-8") as f:
    pred_data = [json.loads(line) for line in f if line.strip()]

gt_dict = {item["id"]: item for item in gt_data}
pred_dict = {item["id"]: item for item in pred_data}

ious = []
skipped = 0

for id_, gt_item in gt_dict.items():
    if id_ not in pred_dict:
        continue

    gt_bbox = extract_and_round_bbox(gt_item["conversations"][1]["value"])
    pred_bbox = extract_and_round_bbox(pred_dict[id_].get("output", ""))

    if gt_bbox and pred_bbox:
        iou = compute_iou(gt_bbox, pred_bbox)
        ious.append(iou)
    else:
        skipped += 1

# 결과 출력
print(f"✅ 평가된 샘플 수: {len(ious)} / {len(gt_dict)}")
print(f"⏭️ 제외된 샘플 수: {skipped}")
print(f"📐 평균 IoU: {sum(ious)/len(ious):.4f}" if ious else "평가할 샘플이 없습니다.")
print(f"🎯 IoU ≥ 0.5 정답률: {sum(i >= 0.5 for i in ious) / len(ious) * 100:.2f}%" if ious else "")
print(f"🎯 IoU ≥ 0.75 정답률: {sum(i >= 0.75 for i in ious) / len(ious) * 100:.2f}%" if ious else "")


✅ 평가된 샘플 수: 3999 / 4000
⏭️ 제외된 샘플 수: 1
📐 평균 IoU: 0.0587
🎯 IoU ≥ 0.5 정답률: 4.45%
🎯 IoU ≥ 0.75 정답률: 1.45%


In [None]:
import json
import re

def extract_and_round_bbox(text):
    """
    문자열에서 [x1, y1, x2, y2] 좌표만 추출하고 소수점 1자리로 반올림
    """
    try:
        match = re.search(r"\[([^\]]+)\]", text)
        if match:
            nums = match.group(1).split(",")
            if len(nums) == 4:
                return [float(n.strip()) for n in nums]
    except:
        pass
    return None

def compute_iou(box1, box2):
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])
    inter_area = max(0, x2 - x1) * max(0, y2 - y1)
    union_area = (box1[2] - box1[0]) * (box1[3] - box1[1]) + \
                 (box2[2] - box2[0]) * (box2[3] - box2[1]) - inter_area
    return inter_area / union_area if union_area != 0 else 0.0

# 🔹 경로 설정
gt_path = "/home/aikusrv04/aiku/small_korean_vlm/data/korean_text_in_the_wild_ocr/text_bbox_output_final.json"
pred_path = "/home/aikusrv04/aiku/small_korean_vlm/data/korean_text_in_the_wild_ocr/text_bbox_output_final_kollava.jsonl"
topk_ids_path = "/home/aikusrv04/aiku/small_korean_vlm/ocr_eval/korean_text_ocr/top_400_ids.json"

# 🔹 데이터 로드
with open(gt_path, "r", encoding="utf-8") as f:
    gt_data = json.load(f)

with open(pred_path, "r", encoding="utf-8") as f:
    pred_data = [json.loads(line) for line in f if line.strip()]

with open(topk_ids_path, "r", encoding="utf-8") as f:
    top_ids = set(json.load(f))

# 🔹 dict로 정리
gt_dict = {item["id"]: item for item in gt_data}
pred_dict = {item["id"]: item for item in pred_data}

# 🔹 평가
ious = []
skipped = 0

for id_ in top_ids:
    gt_item = gt_dict.get(id_)
    pred_item = pred_dict.get(id_)
    if not gt_item or not pred_item:
        skipped += 1
        continue

    gt_bbox = extract_and_round_bbox(gt_item["conversations"][1]["value"])
    pred_bbox = extract_and_round_bbox(pred_item.get("output", ""))

    if gt_bbox and pred_bbox:
        iou = compute_iou(gt_bbox, pred_bbox)
        ious.append(iou)
    else:
        skipped += 1

# 🔹 결과 출력
print(f"✅ 평가된 샘플 수: {len(ious)} / {len(top_ids)}")
print(f"⏭️ 제외된 샘플 수: {skipped}")
if ious:
    print(f"📐 평균 IoU: {sum(ious)/len(ious):.4f}")
    print(f"🎯 IoU ≥ 0.5 정답률: {sum(i >= 0.5 for i in ious) / len(ious) * 100:.2f}%")
    print(f"🎯 IoU ≥ 0.75 정답률: {sum(i >= 0.75 for i in ious) / len(ious) * 100:.2f}%")
else:
    print("⚠️ 평가할 유효한 샘플이 없습니다.")


✅ 평가된 샘플 수: 399 / 400
⏭️ 제외된 샘플 수: 1
📐 평균 IoU: 0.0811
🎯 IoU ≥ 0.5 정답률: 5.26%
🎯 IoU ≥ 0.75 정답률: 1.25%
