In [0]:
import os, re

# 현재 노트북 기준 repo 루트 계산
nb_path = dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()
repo_ws_root = "/".join(nb_path.split("/")[:4])     # /Repos/<user>/<repo>
repo_fs_root = f"/Workspace{repo_ws_root}"          # /Workspace/Repos/<user>/<repo>

# 변환 결과를 저장할 폴더 (repo 안)
# -> 여기 아래는 "진짜 .py" 파일들만 존재하게 만들어 import 가능
out_fs_root = f"{repo_fs_root}/materialized_py"
out_ws_root = f"{repo_ws_root}/materialized_py"

def ensure_dir(p):
    os.makedirs(p, exist_ok=True)

def write_file(path, text):
    ensure_dir(os.path.dirname(path))
    with open(path, "w", encoding="utf-8") as f:
        f.write(text)

def strip_notebook_markers(text: str) -> str:
    # Databricks notebook source markers 제거
    lines = text.splitlines()
    out = []
    for ln in lines:
        if ln.startswith("# Databricks notebook source"):
            continue
        if ln.startswith("# COMMAND ----------"):
            continue
        out.append(ln)
    return "\n".join(out).strip() + "\n"

# 패키지로 인식되도록 __init__.py 생성
for p in [
    f"{out_fs_root}",
    f"{out_fs_root}/base",
    f"{out_fs_root}/base/detections",
    f"{out_fs_root}/base/detections/binary",
    f"{out_fs_root}/base/detections/behavioral",
    f"{out_fs_root}/lib",
]:
    ensure_dir(p)
    init_path = f"{p}/__init__.py"
    if not os.path.exists(init_path):
        write_file(init_path, "")

# 원본 룰/라이브러리 복제(노트북 마커 제거해서 저장)
def materialize_folder(rel_src: str):
    src_fs = f"{repo_fs_root}/{rel_src}"
    dst_fs = f"{out_fs_root}/{rel_src}"
    ensure_dir(dst_fs)

    for fname in sorted(os.listdir(src_fs)):
        if not fname.endswith(".py") or fname.startswith("_"):
            continue
        with open(f"{src_fs}/{fname}", "r", encoding="utf-8") as f:
            raw = f.read()
        clean = strip_notebook_markers(raw)
        write_file(f"{dst_fs}/{fname}", clean)

materialize_folder("base/detections/binary")
materialize_folder("base/detections/behavioral")
materialize_folder("lib")

print("OK: materialized to", out_ws_root)

# rule_registry 업데이트: module_path를 materialized_py 기준으로 바꿈
# 예) base.detections.binary.x  -> materialized_py.base.detections.binary.x
spark.sql("""
UPDATE sandbox.audit_poc.rule_registry
SET module_path = concat('materialized_py.', module_path),
    updated_at = current_timestamp()
WHERE module_path NOT LIKE 'materialized_py.%'
""")

display(spark.sql("SELECT rule_group, COUNT(*) cnt FROM sandbox.audit_poc.rule_registry GROUP BY rule_group ORDER BY rule_group"))
display(spark.sql("SELECT rule_id, module_path, callable_name FROM sandbox.audit_poc.rule_registry ORDER BY rule_group, rule_id LIMIT 20"))
