# Hoshimigato story-to-ML: Quickstart (Colab)

This notebook:
1) loads the dataset
2) exports the per-character *entropy peak band* cut
3) generates plots (event annotations → x1/x2/alpha alongside entropy)
4) runs a vanilla EM/GMM on (x1,x2)


In [15]:
from pathlib import Path
import os, sys

REPO_URL = "https://github.com/Mokafe/reina-field-map.git"
REPO_DIR = Path("/content/reina-field-map")

# ★ここだけ切り替え（基本は sim2 推奨）
PROJECT_SUBDIR = "hoshimigato-shrine-sim2"   # ← sim2
# PROJECT_SUBDIR = "hoshimigato-shrine-sim"  # ← sim

# 1) clone（未cloneなら）: ★clone先を明示
if not REPO_DIR.exists():
    !git clone {REPO_URL} {REPO_DIR}

# 2) cd（プロジェクト直下へ）
PROJECT_DIR = REPO_DIR / PROJECT_SUBDIR
if not PROJECT_DIR.exists():
    raise FileNotFoundError(f"Not found: {PROJECT_DIR} (check PROJECT_SUBDIR)")

os.chdir(PROJECT_DIR)

print("CWD =", Path.cwd())
print("Has requirements.txt?", (PROJECT_DIR / "requirements.txt").exists())
print("Has src/ ?", (PROJECT_DIR / "src").exists())

# 3) import 可能化（src を確実に読む）: ★重複防止
p = str(PROJECT_DIR)
if p not in sys.path:
    sys.path.insert(0, p)

# 4) install: ★絶対パスで指定（CWDズレでも死なない）
!pip -q install -r {PROJECT_DIR/"requirements.txt"}


CWD = /content/reina-field-map/hoshimigato-shrine-sim2
Has requirements.txt? True
Has src/ ? True


In [16]:
# If running in Colab, uncomment the next 2 lines and set your repo URL.
# !git clone https://github.com/Mokafe/reina-field-map.git
# %cd reina-field-map/hoshimigato-shrine-sim

!pip -q install -r requirements.txt


In [17]:
import pandas as pd
from src.io import load_csv
from src.analysis import export_teachcut
from src.plotting import plot_character_bundle
from src.em_gmm import fit_and_plot_gmm_x1x2
from pathlib import Path

df = load_csv('data/hoshike_all_data_en.csv')
df.head()


Unnamed: 0,t,x1,x2,alpha,z,entropy,character,event,event_en
0,-6,9.242,1.803,3.83,0,0.708,kurokawa,研究推進,Research push
1,-6,9.28,1.875,3.759,-1,0.744,kurokawa,研究推進,Research push
2,-6,9.213,1.854,3.794,0,0.716,kurokawa,研究推進,Research push
3,-6,9.201,1.778,3.827,0,0.701,kurokawa,研究推進,Research push
4,-6,9.323,1.806,3.81,-1,0.738,kurokawa,研究推進,Research push


In [18]:
# Teachcut: only the 'wobble center' (entropy peak band)
out = Path('outputs')
out.mkdir(exist_ok=True)
teachcut_path = export_teachcut(df, out_dir=out/'teachcut', band_half_width=1)
teachcut = pd.read_csv(teachcut_path)
teachcut.groupby('character')['t'].agg(['min','max','count'])


Unnamed: 0_level_0,min,max,count
character,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
aoi,-1,0,6
kurokawa,-2,-2,8
misaki,-2,-2,3
reina,0,0,7
takumi,-1,0,10
tome,-1,1,6


In [19]:
# Plots: event annotations → x1/x2/alpha alongside entropy + local zoom around entropy peak
meta = plot_character_bundle(df, out_pdf=out/'plots_en.pdf', out_fig_dir=out/'figures', band_half_width=1)
meta


{'aoi': {'t_peak': 0.0, 'band': [-1.0, 1.0], 'n_points': 10},
 'kurokawa': {'t_peak': -2.0, 'band': [-3.0, -1.0], 'n_points': 25},
 'misaki': {'t_peak': -2.0, 'band': [-3.0, -1.0], 'n_points': 25},
 'reina': {'t_peak': 0.0, 'band': [-1.0, 1.0], 'n_points': 20},
 'takumi': {'t_peak': 0.0, 'band': [-1.0, 1.0], 'n_points': 30},
 'tome': {'t_peak': 0.0, 'band': [-1.0, 1.0], 'n_points': 10}}

In [20]:
# EM/GMM overlay
gmm_df = fit_and_plot_gmm_x1x2(df, out_path=out/'figures'/'gmm_overlay_x1x2.png', n_components=6)
gmm_df[['character','gmm_cluster']].value_counts().head(12)


Unnamed: 0_level_0,Unnamed: 1_level_0,count
character,gmm_cluster,Unnamed: 2_level_1
kurokawa,3,25
takumi,1,25
misaki,0,25
reina,5,13
aoi,2,10
tome,2,10
reina,4,7
takumi,2,4
takumi,4,1


### Outputs
- `outputs/plots_en.pdf`
- `outputs/figures/*`
- `outputs/teachcut/teachcut_peak_band.csv`
