# Ulysses Narrative GMM Turning Points (minimal)

このノートブックは `scripts/run_report.py` と同等の処理を、Colabで実行する最小版である。


In [63]:
# ===== Cell 0: Setup (clone + install + import check) =====
import os, sys, shutil
from pathlib import Path

REPO_URL = "https://github.com/Mokafe/Mokafe-ulysses-narrative-gmm.git"
REPO_DIR = Path("/content/Mokafe-ulysses-narrative-gmm")

# 既存があれば削除してクリーンに（迷い防止）
if REPO_DIR.exists():
    shutil.rmtree(REPO_DIR)

%cd /content
!git clone {REPO_URL}
%cd {REPO_DIR}

# 依存（ログ見えるように -q なし）
!pip install -r requirements.txt

# src を import できるように
sys.path.insert(0, str(Path.cwd() / "src"))

import narrative_gmm
print("OK import narrative_gmm:", narrative_gmm.__file__)
print("CWD:", os.getcwd())
print("TOP:", sorted(os.listdir("."))[:20])


/content
Cloning into 'Mokafe-ulysses-narrative-gmm'...
remote: Enumerating objects: 34, done.[K
remote: Counting objects: 100% (34/34), done.[K
remote: Compressing objects: 100% (28/28), done.[K
remote: Total 34 (delta 5), reused 0 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (34/34), 720.23 KiB | 2.90 MiB/s, done.
Resolving deltas: 100% (5/5), done.
/content/Mokafe-ulysses-narrative-gmm
OK import narrative_gmm: /content/Mokafe-ulysses-narrative-gmm/src/narrative_gmm/__init__.py
CWD: /content/Mokafe-ulysses-narrative-gmm
TOP: ['.git', 'LICENSE', 'README.md', 'data', 'docs', 'notebooks', 'pyproject.toml', 'requirements.txt', 'scripts', 'src']


In [64]:
# ===== Cell 1: Run build_report (generate outputs/) =====
from pathlib import Path
from narrative_gmm.report import build_report, ReportConfig

# 入力（repo内にある前提）
INPUT_PATH = "data/sample/ulysses_fixed.json"
assert Path(INPUT_PATH).exists(), f"missing input: {INPUT_PATH}"

OUT_DIR = Path("outputs")
OUT_DIR.mkdir(parents=True, exist_ok=True)

cfg = ReportConfig(
    n_components=8,   # そのままでOK（軽くしたいなら 2 でもOK）
    seed=0,
    alpha=0.05,
    top_k=20,
    context_w=3,
    plots=True
)

paths = build_report(INPUT_PATH, str(OUT_DIR), cfg)
print("build_report done. returned paths:\n", paths)


build_report done. returned paths:
 {'events_all': PosixPath('outputs/events_all.csv'), 'preds_all': PosixPath('outputs/preds_all.csv'), 'boundary_topK': PosixPath('outputs/boundary_top20.csv'), 'boundary_context': PosixPath('outputs/boundary_context.csv'), 'scene_turning_density': PosixPath('outputs/scene_turning_density.csv'), 'scene_cluster_transitions': PosixPath('outputs/scene_cluster_transitions.csv'), 'gmm_params': PosixPath('outputs/gmm_params.npz')}


In [65]:
# ===== Cell 2: List outputs =====
from pathlib import Path

OUT_DIR = Path("outputs")
print("outputs exists:", OUT_DIR.exists())
print("outputs files:", sorted([p.name for p in OUT_DIR.glob("*")]))


outputs exists: True
outputs files: ['boundary_context.csv', 'boundary_top20.csv', 'events_all.csv', 'fig_cluster_timeline.png', 'fig_entropy_timeline.png', 'fig_turning_points.png', 'gmm_params.npz', 'preds_all.csv', 'scene_cluster_transitions.csv', 'scene_turning_density.csv']


In [66]:
# ===== Cell 3: Handoff check =====
from pathlib import Path

OUT_DIR = Path("outputs")
assert (OUT_DIR/"preds_all.csv").exists(), "preds_all.csv がありません（build_report が完走していません）"
assert (OUT_DIR/"events_all.csv").exists(), "events_all.csv がありません（build_report が完走していません）"
print("OK: handoff ready -> 次は 01 ノートへ進めます")


OK: handoff ready -> 次は 01 ノートへ進めます
