In [None]:
from Libraries.B3_GetStructures import StructureAnalyzer
from typing import List, Dict, Any, Union
import json

def to_json_string(data: Union[List, Dict], pretty: bool = True) -> str:
    """Chuyển object Python thành chuỗi JSON (nếu list/dict)."""
    if pretty:
        return json.dumps(data, ensure_ascii=False, indent=2)
    else:
        return json.dumps(data, ensure_ascii=False)

def to_jsonl_string(data: List[Dict[str, Any]]) -> str:
    """Chuyển list[dict] thành chuỗi JSONL (mỗi dict một dòng)."""
    return "\n".join(json.dumps(item, ensure_ascii=False) for item in data)


datafolder = "HNMU_Regulations"
outputfolder = datafolder + "_topStruct.json"

if __name__ == "__main__":
    analyzer = StructureAnalyzer(
        datafolder, 
        verbose=True
        )

    markers = analyzer.extract_markers()
    print("=== Markers (JSON) ===")
    print(to_jsonl_string([{"MarkerType": m} for m in markers]))
    print("")

    structures = analyzer.build_structures(markers)
    print("=== Structures (JSONL) ===")
    print(to_jsonl_string(structures))
    print("")

    dedup = analyzer.deduplicate(structures)
    print("=== Dedup (JSONL) ===")
    print(to_jsonl_string(dedup))
    print("")

    top = analyzer.select_top(dedup)
    print("=== Top Max Depth (JSON) ===")
    print(to_json_string(top, pretty=True))
    print("")

    analyzer.save_json(top, outputfolder)

[B1] Extracted 154 markers
=== Markers (JSON) ===
{"MarkerType": "Điều 123. "}
{"MarkerType": "Chương XVI"}
{"MarkerType": "Điều 123. "}
{"MarkerType": "123. "}
{"MarkerType": "Điều 123. "}
{"MarkerType": "123. "}
{"MarkerType": "abc) "}
{"MarkerType": "123. "}
{"MarkerType": "abc) "}
{"MarkerType": "123. "}
{"MarkerType": "Điều 123. "}
{"MarkerType": "123. "}
{"MarkerType": "Điều 123. "}
{"MarkerType": "123. "}
{"MarkerType": "abc) "}
{"MarkerType": "123. "}
{"MarkerType": "abc) "}
{"MarkerType": "123. "}
{"MarkerType": "abc) "}
{"MarkerType": "123. "}
{"MarkerType": "Điều 123. "}
{"MarkerType": "123. "}
{"MarkerType": "abc) "}
{"MarkerType": "123. "}
{"MarkerType": "abc) "}
{"MarkerType": "123. "}
{"MarkerType": "abc) "}
{"MarkerType": "Chương XVI"}
{"MarkerType": "Điều 123. "}
{"MarkerType": "123. "}
{"MarkerType": "Điều 123. "}
{"MarkerType": "123. "}
{"MarkerType": "Điều 123. "}
{"MarkerType": "123. "}
{"MarkerType": "Điều 123. "}
{"MarkerType": "123. "}
{"MarkerType": "abc) "}
{"