In [1]:
from utils import KIEVisualizer

In [None]:
vis = KIEVisualizer(
    parquet_path="/home/ian/workspace/data/datalake/processed_realkie_data/data.parquet",
    base_dir="/home/ian/workspace/data/datalake/processed_realkie_data",
    margin=500,
)
vis.render()

interactive(children=(IntSlider(value=0, continuous_update=False, description='i', max=27967), Output()), _dom…

In [3]:
import ast, json, pandas as pd, matplotlib.pyplot as plt, matplotlib.patches as patches
from pathlib import Path
from PIL import Image
import ipywidgets as widgets
from matplotlib import font_manager  # ✅ 폰트 설정 추가

class KIEVisualizer:
    def __init__(self, parquet_path, base_dir=None, margin=450, font_path=None):
        self.df = pd.read_parquet(parquet_path)
        self.base = Path(base_dir or Path(parquet_path).parent)
        self.margin = margin
        self.key_c = (0.9, 0.1, 0.1)   # key box 색
        self.val_c = (0.1, 0.1, 0.9)   # value box 색

        # ✅ 폰트 프로퍼티 설정 (없으면 None)
        if font_path:
            self.font_prop = font_manager.FontProperties(fname=font_path)
        else:
            self.font_prop = None

    @staticmethod
    def _loads(raw):
        if isinstance(raw, (dict, list)):
            return raw
        try:
            return ast.literal_eval(raw)
        except (ValueError, SyntaxError):
            try:
                return json.loads(raw)
            except json.JSONDecodeError:
                return {}

    def _triples(self, d):
        for cls_name, vs in d.items():
            if not vs or cls_name == "Unknown":
                continue
            if isinstance(vs, dict):
                vs = [vs]
            for v in vs:
                if not isinstance(v, dict):
                    continue
                bb = v.get("<|bbox|>", v.get("bbox"))
                if bb and len(bb) == 4 and any(bb):
                    yield cls_name, v.get("<|value|>", v.get("value", "")), bb

    def _img_path(self, rel):
        p = Path(rel)
        return p if p.is_absolute() else (self.base / p).resolve()

    def _draw(self, idx):
        row = self.df.iloc[idx]
        img = Image.open(self._img_path(row["image_path"]))
        W, H = img.size

        fig, ax = plt.subplots(figsize=(10, 10 * H / (W + self.margin)))
        ax.imshow(img)
        ax.set_xlim(0, W + self.margin)
        ax.set_ylim(H, 0)
        ax.set_facecolor('white')

        label_dict = self._loads(row["label"])
        if isinstance(label_dict, dict) and "post_office" in label_dict:
            label_dict = label_dict["post_office"]

        for k, v, bb in self._triples(label_dict):
            x1, y1, x2, y2 = bb[0]*W, bb[1]*H, bb[2]*W, bb[3]*H
            ax.add_patch(
                patches.Rectangle((x1, y1), x2-x1, y2-y1,
                                  lw=2, ec=self.val_c, fc='none')
            )
            if v:
                ax.text(x1, y1-3, str(v), color='white', fontsize=8,
                        fontproperties=self.font_prop,  # ✅ 한글 텍스트 대응
                        bbox=dict(boxstyle='round,pad=0.2',
                                  fc=self.val_c, ec=self.val_c, alpha=0.7))
            tx = W + 20
            txt = ax.text(tx, (y1+y2)/2, k, color='white', fontsize=9,
                          fontproperties=self.font_prop,  # ✅ 한글 클래스명 대응
                          bbox=dict(boxstyle='round,pad=0.3',
                                    fc=self.key_c, ec=self.key_c))
            bb_key = txt.get_window_extent(fig.canvas.get_renderer())
            kx, ky = ax.transData.inverted().transform((bb_key.x0, bb_key.y0))
            ax.annotate('', xy=((x1+x2)/2, (y1+y2)/2), xytext=(kx, ky),
                        arrowprops=dict(arrowstyle='-|>', color=self.val_c))
        ax.axis('off')
        plt.show()

    def render(self):
        slider = widgets.IntSlider(
            value=0,
            min=0,
            max=len(self.df)-1,
            step=1,
            continuous_update=False,
            description='index'
        )
        return widgets.interact(lambda idx: self._draw(idx), idx=slider)

In [3]:
viz = KIEVisualizer(
    "/home/ian/workspace/data/datalake/postoffice_labeling/parquet/data.parquet",
    base_dir="/home/ian/workspace/data/datalake/postoffice_labeling",
    font_path="/home/ian/workspace/data/datalake/NotoSansKR-Medium.ttf"  # ✅ 꼭 지정
)
viz.render()

TypeError: KIEVisualizer.__init__() got an unexpected keyword argument 'font_path'