In [1]:
# === Korean font loader (Colab/리눅스/로컬 공통) ===
import os, urllib.request
import matplotlib.pyplot as plt
from matplotlib import font_manager
from pathlib import Path

def _try_download(url: str, dst: str) -> bool:
    try:
        urllib.request.urlretrieve(url, dst)
        return True
    except Exception as e:
        print(f"[Font] download failed: {url} -> {type(e).__name__}: {e}")
        return False

def ensure_korean_font_runtime(
    local_path="/content/NotoSansKR-Regular.ttf",
    family_name="Noto Sans KR",
):
    # 0) 이미 있으면 바로 등록
    if os.path.exists(local_path):
        print(f"[Font] using local file: {local_path}")
    else:
        # 1) 다운로드 미러들 (변경/차단 대비 다중 시도)
        mirrors = [
            # Google Noto Fonts repo (TTF, hinted)
            "https://github.com/googlefonts/noto-fonts/raw/main/hinted/ttf/NotoSansKR/NotoSansKR-Regular.ttf",
            # Google Noto Fonts repo (OTF, unhinted)
            "https://github.com/googlefonts/noto-fonts/raw/main/unhinted/otf/NotoSansKR/NotoSansKR-Regular.otf",
            # Noto CJK variable (OTF)
            "https://github.com/googlefonts/noto-cjk/raw/main/Sans/Variable/OTF/Korean/NotoSansKR-VF.otf",
            # 구글 폰트 패키지 mirror
            "https://raw.githubusercontent.com/fonts-archive/NotoSansKR/main/NotoSansKR-Regular.otf",
        ]
        Path(os.path.dirname(local_path) or ".").mkdir(parents=True, exist_ok=True)
        ok = False
        for url in mirrors:
            if _try_download(url, local_path):
                ok = True
                break

        if not ok:
            try:
                from google.colab import files
                print("[Font] 모든 미러가 실패했습니다. 로컬의 한글 폰트 파일(.ttf/.otf)을 업로드하세요.")
                uploaded = files.upload()  # 업로드 창
                # 임의 파일 하나를 local_path로 저장
                up_name = next(iter(uploaded.keys()))
                os.rename(f"/content/{up_name}", local_path)
                ok = True
            except Exception as e:
                raise RuntimeError(
                    "한글 폰트를 자동으로 받을 수 없고 업로드도 실패했습니다. "
                    "NotoSansKR-Regular.ttf 또는 NanumGothic.ttf 파일을 직접 제공해 주세요."
                ) from e

    # 3) Matplotlib에 동적 등록 + 캐시 갱신
    font_manager.fontManager.addfont(local_path)
    font_manager._load_fontmanager(try_read_cache=False)

    # 4) 전역 기본 폰트로 지정 (minus 깨짐 방지)
    plt.rcParams["font.family"] = family_name
    plt.rcParams["axes.unicode_minus"] = False
    # PDF/SVG 벡터 출력 호환성
    plt.rcParams["pdf.fonttype"] = 42
    plt.rcParams["svg.fonttype"] = "none"

    # 5) 개별 라벨/주석에도 강제할 수 있게 FontProperties 반환
    return font_manager.FontProperties(fname=local_path)

# 실행
fontprop = ensure_korean_font_runtime()
print("[Font] ready with:", fontprop.get_name())

[Font] download failed: https://github.com/googlefonts/noto-fonts/raw/main/hinted/ttf/NotoSansKR/NotoSansKR-Regular.ttf -> HTTPError: HTTP Error 404: Not Found
[Font] download failed: https://github.com/googlefonts/noto-fonts/raw/main/unhinted/otf/NotoSansKR/NotoSansKR-Regular.otf -> HTTPError: HTTP Error 404: Not Found
[Font] download failed: https://github.com/googlefonts/noto-cjk/raw/main/Sans/Variable/OTF/Korean/NotoSansKR-VF.otf -> HTTPError: HTTP Error 404: Not Found
[Font] ready with: Noto Sans KR


In [7]:
CSV_PATH  = "law_mapping_top1.csv"   # 법률 매핑 점수
# CSV_PATH = "../data/Law_Mapping_Dataset/news/파일이름.csv"
# CSV_PATH = "../data/Law_Mapping_Dataset/SNS/파일이름.csv"

XLSX_PATH = "preprocess_insta_part_1_table.xlsx"     # 클러스터 테이블 (cluster_size 포함)
# XLSX_PATH = "../data/Clustering_Dataset/news/파일이름.xlsx"
# XLSX_PATH = "../data/Clustering_Dataset/SNS/파일이름.xlsx"

OUT_CSV   = "./legal_issue_demand_scores.csv"
OUT_PNG   = "./top_issues_barh.png"

TOP_VIEW = 40  # 상위 프리뷰 개수

WEIGHTS = None

In [9]:
import sys, subprocess, matplotlib
from matplotlib import font_manager, rcParams

def ensure_korean_font():
    try:
        available = [f.name for f in font_manager.fontManager.ttflist]
        if any("NanumGothic" in n for n in available):
            rcParams["font.family"] = "NanumGothic"
            matplotlib.rcParams["axes.unicode_minus"] = False
            print("Using existing NanumGothic font.")
            return

        if "google.colab" in sys.modules:
            print("Installing fonts-nanum (sudo apt-get)...")
            subprocess.run(["sudo","apt-get","update","-y"], check=False)
            subprocess.run(["sudo","apt-get","install","-y","fonts-nanum"], check=False)
            font_manager._load_fontmanager(try_read_cache=False)
            rcParams["font.family"] = "NanumGothic"
            matplotlib.rcParams["axes.unicode_minus"] = False
            print("Installed and set NanumGothic.")
        else:
            print("Not in Colab; skipping apt-get install. Set a supported font manually if needed.")
    except Exception as e:
        print("Font setup skipped or failed:", e)

ensure_korean_font()


Installing fonts-nanum (sudo apt-get)...
Installed and set NanumGothic.


In [14]:
import os, math, json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from textwrap import shorten

def read_cluster_table(path: str) -> pd.DataFrame:
    """Excel에서 cluster_id, cluster_size 추출 (유사명 허용)."""
    df = pd.read_excel(path)
    cols = {c: c.strip() for c in df.columns}
    df.rename(columns=cols, inplace=True)
    cols_lc = {c.lower(): c for c in df.columns}
    cid_cands = ["cluster_id", "cid", "clusterid", "cluster"]
    cid_col = next((cols_lc[k] for k in cid_cands if k in cols_lc), None)
    if cid_col is None:
        raise ValueError("sample_9000_table.xlsx에서 cluster_id(또는 유사) 컬럼을 찾지 못했습니다.")
    size_cands = ["final_rank_score"]
    size_col = next((cols_lc[k] for k in size_cands if k in cols_lc), None)
    if size_col is None:
        raise ValueError("sample_9000_table.xlsx에서 final_rank_score(또는 유사) 컬럼을 찾지 못했습니다.")
    out = df[[cid_col, size_col]].copy()
    out.columns = ["cluster_id", "final_rank_score"]
    return out

def read_mapping_scores(path: str) -> pd.DataFrame:
    """CSV에서 cluster_id와 점수 컬럼(conf, nli_entail, sbert, bm25) 표준화."""
    df = pd.read_csv(path)
    cols = {c: c.strip() for c in df.columns}
    df.rename(columns=cols, inplace=True)
    cols_lc = {c.lower(): c for c in df.columns}
    cid_cands = ["cluster_id", "cid", "clusterid", "cluster"]
    cid_col = next((cols_lc[k] for k in cid_cands if k in cols_lc), None)
    if cid_col is None:
        raise ValueError("law_mapping_top1.csv에서 cluster_id(또는 유사) 컬럼을 찾지 못했습니다.")
    conf_col  = next((cols_lc[k] for k in ["conf","confidence","conf_score"] if k in cols_lc), None)
    nli_col   = next((cols_lc[k] for k in ["nli_entail","nli_entail_prob","nli","entail_prob"] if k in cols_lc), None)
    sbert_col = next((cols_lc[k] for k in ["sbert","sbert_score","sim_sbert"] if k in cols_lc), None)
    bm25_col  = next((cols_lc[k] for k in ["bm25","bm25_score"] if k in cols_lc), None)
    title_col    = next((cols_lc[k] for k in ["official_name","law","law_name","matched_law","법령명"] if k in cols_lc), None)
    query_col    = next((cols_lc[k] for k in ["query","keywords","keyword","issue_keywords","핵심키워드"] if k in cols_lc), None)
    decision_col = next((cols_lc[k] for k in ["decision","judge","label"] if k in cols_lc), None)
    need_cols = [cid_col, conf_col, nli_col, sbert_col, bm25_col]
    if any(c is None for c in need_cols):
        raise ValueError("법률 매핑 스코어 CSV에서 필요한 컬럼을 찾지 못했습니다.")
    keep = [cid_col, conf_col, nli_col, sbert_col, bm25_col]
    if title_col: keep.append(title_col)
    if query_col: keep.append(query_col)
    if decision_col: keep.append(decision_col)
    out = df[keep].copy()
    rename_map = {cid_col: 'cluster_id', conf_col: 'conf', nli_col: 'nli_entail', sbert_col: 'sbert', bm25_col: 'bm25'}
    if title_col: rename_map[title_col] = 'official_name'
    if query_col: rename_map[query_col] = 'query'
    if decision_col: rename_map[decision_col] = 'decision'
    out.rename(columns=rename_map, inplace=True)
    return out

def rank_pct(s: pd.Series) -> pd.Series:
    """분위수 랭크 [0,1]. NaN은 랭크의 중앙값으로 대체."""
    r = s.rank(pct=True, method='average')
    if r.isna().any():
        med = r.median()
        r = r.fillna(med if not np.isnan(med) else 0.5)
    return r

def normalize_scores(df: pd.DataFrame) -> pd.DataFrame:
    """점수들을 0~1 분위수로 정규화."""
    for col in ['conf','nli_entail','sbert','bm25']:
        if col in df.columns:
            med = df[col].median() if np.isfinite(df[col]).any() else 0.0
            df[col] = df[col].fillna(med)
            df[col + '_norm'] = rank_pct(df[col])
    return df

def compute_composites(df: pd.DataFrame, weights=None) -> pd.DataFrame:
    """weighted_sum, geometric_mean, conservative_min, demand_score 계산."""
    if weights is None:
        weights = {
            'final_rank_score': 0.40,
            'conf_norm':         0.20,
            'nli_entail_norm':   0.20,
            'sbert_norm':        0.10,
            'bm25_norm':         0.10,
        }
    for k in ['final_rank_score','conf_norm','nli_entail_norm','sbert_norm','bm25_norm']:
        if k not in df.columns:
            df[k] = 0.5
    ws = np.zeros(len(df))
    for k, w in weights.items():
        if k in df.columns:
            ws += w * df[k].values
    df['weighted_sum'] = ws
    gm_cols = ['final_rank_score','conf_norm','nli_entail_norm','sbert_norm','bm25_norm']
    df['geometric_mean'] = np.exp(np.log(np.clip(df[gm_cols].values, 1e-6, 1)).mean(axis=1))
    df['conservative_min'] = df[gm_cols].min(axis=1)
    df['demand_score'] = 0.6 * df['weighted_sum'] + 0.4 * df['geometric_mean']
    return df

def plot_top_bar(scored: pd.DataFrame, out_png: str, topn: int = 20, fontprop=fontprop):
    topk = scored.head(topn).copy()
    labels = []
    for i in range(len(topk)):
        if 'official_name' in topk.columns and pd.notna(topk.loc[i, 'official_name']):
            label = str(topk.loc[i, 'official_name'])
        else:
            label = f"Cluster {int(topk.loc[i, 'cluster_id'])}"
        labels.append(shorten(label, width=35, placeholder='…'))

    vals = topk['demand_score'].values[:len(labels)]
    plt.figure(figsize=(10, 6))
    plt.barh(range(len(labels)), vals)
    ax = plt.gca()
    ax.set_yticks(range(len(labels)))
    if fontprop is not None:
        ax.set_yticklabels(labels, fontproperties=fontprop)
        ax.set_xlabel('Demand score', fontproperties=fontprop)
        ax.set_title('Top Legal Issues by Estimated Demand', fontproperties=fontprop)
    else:
        ax.set_yticklabels(labels)
        ax.set_xlabel('Demand score')
        ax.set_title('Top Legal Issues by Estimated Demand')
    ax.invert_yaxis()
    plt.tight_layout()
    plt.savefig(out_png, dpi=160, bbox_inches='tight')
    plt.close()


In [15]:
# ================== (추가) 성분 기여도 및 시각화 유틸 ==================
from matplotlib import pyplot as plt
from matplotlib.ticker import MaxNLocator

def compute_component_contributions(df: pd.DataFrame, weights: dict | None) -> pd.DataFrame:
    """
    demand_score 조합에 대한 간단한 '가중 기여도' 프록시를 계산.
    - weighted_sum = Σ w_i * x_i  (x_i는 [0,1] 정규화 점수)
    - contribution_i = w_i * x_i / Σ w_i * x_i
    """
    if weights is None:
        weights = {
            'final_rank_score': 0.40,
            'conf_norm':      0.20,
            'nli_entail_norm':0.20,
            'sbert_norm':     0.10,
            'bm25_norm':      0.10,
        }
    comp_cols = [c for c in ['final_rank_score','conf_norm','nli_entail_norm','sbert_norm','bm25_norm'] if c in df.columns]
    for c in comp_cols:
        if c not in df.columns:
            df[c] = 0.5
    wsum = np.zeros(len(df))
    for c in comp_cols:
        wsum += weights.get(c, 0.0) * df[c].values
    wsum = np.where(wsum <= 1e-12, 1e-12, wsum)

    contrib = pd.DataFrame(index=df.index)
    for c in comp_cols:
        contrib[c] = (weights.get(c, 0.0) * df[c].values) / wsum
    return contrib, comp_cols

def _short_label(row, width=30):
    if 'official_name' in row and pd.notna(row['official_name']):
        return shorten(str(row['official_name']), width=width, placeholder='…')
    return f"Cluster {int(row['cluster_id'])}"

def plot_top_heatmap(scored_df: pd.DataFrame, out_png: str, topn: int = 30,
                     weights: dict | None = None, fontprop=fontprop):
    contrib, comp_cols = compute_component_contributions(scored_df, weights)
    top = scored_df.head(topn).copy()
    contrib_top = contrib.loc[top.index, comp_cols]
    labels = [(_short_label(top.loc[i], width=30)) for i in top.index]

    fig, ax = plt.subplots(figsize=(10, 0.35*len(labels) + 2))
    im = ax.imshow(contrib_top.values, aspect='auto')
    ax.set_yticks(range(len(labels)))
    if fontprop is not None:
        ax.set_yticklabels(labels, fontproperties=fontprop)
        ax.set_xticks(range(len(comp_cols)))
        ax.set_xticklabels(comp_cols, rotation=0, fontproperties=fontprop)
        ax.set_title("Component Contributions (Top N)", fontproperties=fontprop)
    else:
        ax.set_yticklabels(labels)
        ax.set_xticks(range(len(comp_cols)))
        ax.set_xticklabels(comp_cols, rotation=0)
        ax.set_title("Component Contributions (Top N)")

    fig.tight_layout()
    fig.savefig(out_png, dpi=160, bbox_inches='tight')
    plt.close(fig)


def plot_score_scatter(scored_df: pd.DataFrame, out_png: str, x='nli_entail_norm', y='sbert_norm',
                       size_col='final_rank_score', top_annotate: int = 20,
                       fontprop=fontprop):
    df = scored_df.copy()
    xs = df.get(x, pd.Series([0.5]*len(df)))
    ys = df.get(y, pd.Series([0.5]*len(df)))
    ss = 300 * (df.get(size_col, pd.Series([0.5]*len(df))) + 0.05)

    fig, ax = plt.subplots(figsize=(8, 6))
    ax.scatter(xs, ys, s=ss, alpha=0.6)
    if fontprop is not None:
        ax.set_xlabel(x, fontproperties=fontprop)
        ax.set_ylabel(y, fontproperties=fontprop)
        ax.set_title(f"Scatter: {x} vs {y} (bubble={size_col})", fontproperties=fontprop)
    else:
        ax.set_xlabel(x); ax.set_ylabel(y); ax.set_title(f"Scatter: {x} vs {y} (bubble={size_col})")

    for i in range(min(top_annotate, len(df))):
        lbl = _short_label(df.loc[i], width=22)
        ax.annotate(lbl, (xs.iloc[i], ys.iloc[i]), xytext=(5,5), textcoords='offset points',
                    fontsize=8, fontproperties=fontprop)

    fig.tight_layout()
    fig.savefig(out_png, dpi=160, bbox_inches='tight')
    plt.close(fig)

def plot_score_distributions(scored_df: pd.DataFrame, out_dir: str):
    """
    주요 정규화 점수 분포 히스토그램(각각 단일 이미지) 저장.
    """
    metrics = [c for c in ['final_rank_score','conf_norm','nli_entail_norm','sbert_norm','bm25_norm','demand_score'] if c in scored_df.columns]
    os.makedirs(out_dir, exist_ok=True)
    for m in metrics:
        fig, ax = plt.subplots(figsize=(7,4))
        ax.hist(scored_df[m].values, bins=20)
        ax.set_title(f"Distribution of {m}")
        ax.set_xlabel(m)
        ax.set_ylabel("Count")
        fig.tight_layout()
        out_path = os.path.join(out_dir, f"dist_{m}.png")
        fig.savefig(out_path, dpi=160)
        plt.close(fig)

def safe_save_plotfile(make_plot_fn, *args, **kwargs):
    """
    make_plot_fn이 내부에서 fig.savefig(...)를 수행하는 형태일 때,
    디렉토리 생성까지 안전하게 감싸는 헬퍼.
    """
    # out path는 첫 번째 위치인자 또는 키워드 'out_png'/'out_dir'로 전달된다고 가정
    out_path = None
    if len(args) >= 2 and isinstance(args[1], str):
        out_path = args[1]
    if 'out_png' in kwargs:
        out_path = kwargs['out_png']
    if 'out_dir' in kwargs:
        out_path = kwargs['out_dir']

    if out_path:
        d = os.path.dirname(out_path)
        if d:
            os.makedirs(d, exist_ok=True)
    make_plot_fn(*args, **kwargs)

In [16]:
cluster_df = read_cluster_table(XLSX_PATH)
map_df = read_mapping_scores(CSV_PATH)
merged = pd.merge(map_df, cluster_df, on='cluster_id', how='left')
normed = normalize_scores(merged.copy())
scored = compute_composites(normed, weights=WEIGHTS)
front = [
    'cluster_id','official_name','query','decision',
    'conf_norm','nli_entail_norm','sbert_norm','bm25_norm','final_rank_score',
    'weighted_sum','geometric_mean','conservative_min','demand_score'
]
cols = [c for c in front if c in scored.columns] + [c for c in scored.columns if c not in front]
scored = scored[cols].copy().sort_values('demand_score', ascending=False).reset_index(drop=True)
scored['rank'] = np.arange(1, len(scored)+1)

import os, numpy as np

# 안전 저장 유틸
def safe_save_csv(df, path: str):
    dirpath = os.path.dirname(path)
    if dirpath:  # 빈 문자열이면 생략
        os.makedirs(dirpath, exist_ok=True)
    df.to_csv(path, index=False)
    print(f"[OK] CSV saved -> {os.path.abspath(path)}  (exists={os.path.exists(path)})")

def safe_save_plot(scored_df, path: str, topn=20):
    if not path:
        print("[INFO] OUT_PNG is empty/None. Skip plot save.")
        return
    dirpath = os.path.dirname(path)
    if dirpath:
        os.makedirs(dirpath, exist_ok=True)
    plot_top_bar(scored_df, path, topn=topn)
    print(f"[OK] PNG saved -> {os.path.abspath(path)}  (exists={os.path.exists(path)})")

# === 저장 실행 ===
# 경로가 파일명만이라면 이렇게 두는 게 가장 안전합니다.
# OUT_CSV = "legal_issue_demand_scores.csv"
# OUT_PNG = "top_issues_barh.png"

safe_save_csv(scored, OUT_CSV)
safe_save_plot(scored, OUT_PNG, topn=20)

# 확인용
print("\n[HEAD]")
display(scored.head(TOP_VIEW))

OUT_SCATTER = "./viz/scatter_nli_vs_sbert.png"
safe_save_plotfile(plot_score_scatter, scored, OUT_SCATTER,
                   x='nli_entail_norm', y='sbert_norm', size_col='final_rank_score', top_annotate=20)
print(f"[OK] Scatter saved -> {os.path.abspath(OUT_SCATTER)}  (exists={os.path.exists(OUT_SCATTER)})")

# 2) Top-N 성분 기여도 히트맵
OUT_HEATMAP = "./viz/top_components_heatmap.png"
safe_save_plotfile(plot_top_heatmap, scored, OUT_HEATMAP, topn=30, weights=WEIGHTS)
print(f"[OK] Heatmap saved -> {os.path.abspath(OUT_HEATMAP)}  (exists={os.path.exists(OUT_HEATMAP)})")

# 3) 주요 지표 분포(각각 단일 png로 저장)
OUT_DISTS_DIR = "./viz/dists"
safe_save_plotfile(plot_score_distributions, scored, out_dir=OUT_DISTS_DIR)
print(f"[OK] Dists saved in -> {os.path.abspath(OUT_DISTS_DIR)}")




[OK] CSV saved -> /content/legal_issue_demand_scores.csv  (exists=True)




[OK] PNG saved -> /content/top_issues_barh.png  (exists=True)

[HEAD]


Unnamed: 0,cluster_id,official_name,query,decision,conf_norm,nli_entail_norm,sbert_norm,bm25_norm,final_rank_score,weighted_sum,geometric_mean,conservative_min,demand_score,conf,nli_entail,sbert,bm25,rank
0,22,헌법재판소 공무원 규칙,"핵심 키워드: 공수처, 정동훈, 다녀왔다, 노상원, 수사하, 수사해야, 유출됐다며,...",자동 확정,0.764706,0.764706,0.852941,0.470588,0.78913,0.753887,0.713739,0.470588,0.737828,0.7802,0.5764,0.4108,0.0,1
1,15,방송법,"핵심 키워드: 고발인단, 성폭력, 불법정보, 여성청소년수사팀에, 제17조, 단체고발...",자동 확정,0.941176,1.0,0.176471,0.941176,0.672331,0.768933,0.637264,0.176471,0.716265,0.8135,0.8709,0.3144,3.8351,2
2,3,방송법,"핵심 키워드: 게시됩니다, 가족합창단발달장애자폐스펙트럼지적장애, 자녀들, 아임소리아...",자동 확정,0.911765,0.911765,0.735294,0.470588,0.454714,0.66718,0.665765,0.454714,0.666614,0.8092,0.6185,0.3956,0.0,3
3,21,공문서에 대한 아포스티유 및 본부영사확인서 발급에 관한 규정 시행규칙,"핵심 키워드: 까면, 복귀, 언론과, 공통점, 중요사건, 열람했다, 아침, 나라, ...",자동 확정,0.794118,0.794118,0.705882,0.470588,0.566267,0.661801,0.652879,0.470588,0.658232,0.7897,0.5793,0.3944,0.0,4
4,34,공문서에 대한 아포스티유 및 본부영사확인서 발급에 관한 규정 시행규칙,"핵심 키워드: 게시하거, 법적인, 대화, 물론이다, 심하면, 순서, 도와드리며, 2...",자동 확정,0.705882,0.705882,0.617647,0.470588,0.63365,0.644636,0.62021,0.470588,0.634866,0.753,0.506,0.3827,0.0,5
5,12,지방세특례제한법 시행령,"핵심 키워드: 수급자, 청년성장프로젝트, 견학, 성공취업, 34세, 20명, 이동,...",자동 확정,0.823529,0.823529,0.970588,0.470588,0.2728,0.582649,0.610064,0.2728,0.593615,0.793,0.5859,0.4185,0.0,6
6,14,공문서에 대한 아포스티유 및 본부영사확인서 발급에 관한 규정 시행규칙,"핵심 키워드: 적시했다, 선거인, 국민신문고, 21대, 제256조, 유도인, 여성들...",자동 확정,0.852941,0.852941,0.294118,0.470588,0.498501,0.617047,0.54971,0.294118,0.590112,0.8005,0.601,0.3223,0.0,7
7,4,공문서에 대한 아포스티유 및 본부영사확인서 발급에 관한 규정 시행규칙,"핵심 키워드: 유출, 보안, 담당업무, 악성코드, skt, 가입, 복제, 공격, w...",휴먼 검토,0.558824,0.558824,0.764706,0.470588,0.596715,0.585745,0.582493,0.470588,0.584444,0.743,0.4861,0.4081,0.0,8
8,25,공문서에 대한 아포스티유 및 본부영사확인서 발급에 관한 규정 시행규칙,"핵심 키워드: 미이용, 급여대장, 공고기간, 보려면, 전자적, 보유기간, 신고했으며...",휴먼 검토,0.617647,0.617647,1.0,0.470588,0.349856,0.53406,0.574913,0.349856,0.550401,0.7457,0.4914,0.5414,0.0,9
9,26,공문서에 대한 아포스티유 및 본부영사확인서 발급에 관한 규정 시행규칙,"핵심 키워드: 잠정, 챗봇, 틱톡, 마켓, 바이트댄스, 다운로드, 서비스에, 성능,...",자동 확정,0.882353,0.882353,0.264706,0.470588,0.366992,0.573267,0.513179,0.264706,0.549232,0.8015,0.6031,0.319,0.0,10




[OK] Scatter saved -> /content/viz/scatter_nli_vs_sbert.png  (exists=True)




[OK] Heatmap saved -> /content/viz/top_components_heatmap.png  (exists=True)




[OK] Dists saved in -> /content/viz/dists
