# 10 Final Teacher-Guided Semantic Basis Projection Report

This notebook builds the final report suite from existing project artifacts.
Outputs:
- `outputs/final_report/tables/*.csv`
- `outputs/final_report/figures/*.png`
- `docs/FINAL_REPORT.md`
- `docs/OTHER_EXPERIMENTS.md`


In [None]:
from __future__ import annotations

from pathlib import Path
import textwrap

import numpy as np
import pandas as pd

import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from matplotlib.patches import FancyBboxPatch
import matplotlib.image as mpimg

SEED = 42
MA_WINDOW = 5
EPS = 1e-8

PROJECT_ROOT = Path('.').resolve()
OUTPUTS_ROOT = PROJECT_ROOT / 'outputs'
DOCS_ROOT = PROJECT_ROOT / 'docs'

FINAL_OUT = OUTPUTS_ROOT / 'final_report'
FINAL_FIG = FINAL_OUT / 'figures'
FINAL_TAB = FINAL_OUT / 'tables'
for d in [FINAL_OUT, FINAL_FIG, FINAL_TAB, DOCS_ROOT]:
    d.mkdir(parents=True, exist_ok=True)

SRC_VARIANT_TAB = OUTPUTS_ROOT / 'excitement_variant_analysis' / 'tables'
SRC_VARIANT_FIG = OUTPUTS_ROOT / 'excitement_variant_analysis' / 'figures'
SRC_CLUSTER_TAB = OUTPUTS_ROOT / 'excitement_indep_clustering' / 'tables'
SRC_CLUSTER_FIG = OUTPUTS_ROOT / 'excitement_indep_clustering' / 'figures'
SRC_LINEAR_TAB = OUTPUTS_ROOT / 'excitement_linear' / 'tables'

print('PROJECT_ROOT:', PROJECT_ROOT)
print('FINAL_OUT:', FINAL_OUT)


In [None]:
# Load source tables and build summary tables for report claims
required_sources = {
    'variant_global_metrics': SRC_VARIANT_TAB / 'model_global_metrics_by_variant.csv',
    'variant_per_novel_metrics': SRC_VARIANT_TAB / 'model_per_novel_metrics_by_variant.csv',
    'variant_pairwise': SRC_VARIANT_TAB / 'variant_pairwise_agreement_global.csv',
    'cluster_quality': SRC_CLUSTER_TAB / 'cluster_quality_by_method.csv',
    'cluster_profile': SRC_CLUSTER_TAB / 'cluster_profile_summary.csv',
    'cluster_representatives': SRC_CLUSTER_TAB / 'cluster_representatives.csv',
    'cluster_genre_prop': SRC_CLUSTER_TAB / 'genre_by_feature_cluster_proportions.csv',
    'cluster_assign_feature': SRC_CLUSTER_TAB / 'cluster_assignments_feature.csv',
    'cluster_agreement': SRC_CLUSTER_TAB / 'cluster_method_agreement.csv',
    'split_manifest': SRC_LINEAR_TAB / 'split_manifest.csv',
    'metadata': PROJECT_ROOT / 'data' / 'metadata.csv',
    'ciw5_model_npz': OUTPUTS_ROOT / 'excitement_variant_analysis' / 'model' / 'linear_weights_indep_winsize_5.npz',
}

source_checks = []
for name, path in required_sources.items():
    exists = bool(path.exists())
    source_checks.append({
        'check': f'source_exists::{name}',
        'expected': True,
        'actual': exists,
        'pass': exists,
    })
    if not exists:
        raise FileNotFoundError(f'Missing required source: {path}')

variant_df = pd.read_csv(required_sources['variant_global_metrics'])
variant_per_book_df = pd.read_csv(required_sources['variant_per_novel_metrics'])
pairwise_df = pd.read_csv(required_sources['variant_pairwise'])
cluster_quality_df = pd.read_csv(required_sources['cluster_quality'])
cluster_profile_df = pd.read_csv(required_sources['cluster_profile'])
cluster_rep_df = pd.read_csv(required_sources['cluster_representatives'])
cluster_genre_prop_df = pd.read_csv(required_sources['cluster_genre_prop'])
cluster_assign_df = pd.read_csv(required_sources['cluster_assign_feature'])
cluster_agree_df = pd.read_csv(required_sources['cluster_agreement'])
split_manifest_df = pd.read_csv(required_sources['split_manifest'])
metadata_df = pd.read_csv(required_sources['metadata'])
ciw5_model_npz = np.load(required_sources['ciw5_model_npz'], allow_pickle=True)

variant_name_map = {
    'base': ('NC-1', 'No-Context Chunk Teacher Labels'),
    'winsize_5': ('SW-5', 'Shared-Window Labels'),
    'indep_winsize_5': ('CIW-5', 'Context-Window Independent Labels'),
}

# Dataset profile table for report reproducibility
meta_cols = ['id', 'genre_primary']
dataset_profile_df = (
    split_manifest_df
    .merge(metadata_df[meta_cols], left_on='book_id', right_on='id', how='left')
    .drop(columns=['id'])
    .rename(columns={'genre_primary': 'genre_primary'})
)
dataset_profile_df['genre_primary'] = dataset_profile_df['genre_primary'].fillna('Unknown')
dataset_profile_df = dataset_profile_df[['book_id', 'title', 'processed_dir', 'genre_primary', 'T', 'split']].sort_values(
    ['split', 'book_id']
).reset_index(drop=True)
dataset_profile_df.to_csv(FINAL_TAB / 'dataset_profile_for_report.csv', index=False)

# Variant ranking diagnostics (trend-first is the locked selector)
variant_summary = variant_df.copy()
variant_summary['variant_code'] = variant_summary['variant'].map(lambda v: variant_name_map[v][0])
variant_summary['variant_name'] = variant_summary['variant'].map(lambda v: variant_name_map[v][1])


def assign_rank(df: pd.DataFrame, split_col: str, sort_cols: list[str], ascending: list[bool]) -> pd.Series:
    out = pd.Series(index=df.index, dtype='int64')
    for split_name, idx in df.groupby(split_col).groups.items():
        ordered = df.loc[idx].sort_values(sort_cols, ascending=ascending, kind='mergesort')
        out.loc[ordered.index] = np.arange(1, len(ordered) + 1)
    return out.astype(int)


variant_summary['rank_trend_primary'] = assign_rank(
    variant_summary,
    'split',
    ['mae_ma', 'mae', 'rmse', 'variant_code'],
    [True, True, True, True],
)
variant_summary['rank_raw_error'] = assign_rank(
    variant_summary,
    'split',
    ['mae', 'rmse', 'mae_ma', 'variant_code'],
    [True, True, True, True],
)
variant_summary['rank_corr'] = assign_rank(
    variant_summary,
    'split',
    ['corr', 'r2', 'mae_ma', 'variant_code'],
    [False, False, True, True],
)

variant_selection_summary = variant_summary[
    ['variant_code', 'variant_name', 'split', 'rmse', 'mae', 'mae_ma', 'r2', 'corr', 'rank_trend_primary']
].rename(columns={'mae_ma': 'mae_ma5'})
variant_selection_summary.to_csv(FINAL_TAB / 'variant_selection_summary.csv', index=False)

variant_selection_diagnostics = variant_summary[
    ['variant_code', 'split', 'rmse', 'mae', 'mae_ma', 'r2', 'corr', 'rank_trend_primary', 'rank_raw_error', 'rank_corr']
].rename(columns={'mae_ma': 'mae_ma5'}).sort_values(['split', 'rank_trend_primary', 'variant_code']).reset_index(drop=True)
variant_selection_diagnostics.to_csv(FINAL_TAB / 'variant_selection_diagnostics.csv', index=False)

# Selected variant by locked criterion on test split
variant_test_rank = variant_selection_diagnostics[variant_selection_diagnostics['split'] == 'test'].sort_values(
    ['rank_trend_primary', 'mae_ma5', 'mae', 'rmse', 'variant_code'],
    ascending=[True, True, True, True, True],
    kind='mergesort',
)
selected_variant_code = str(variant_test_rank.iloc[0]['variant_code'])

# Per-book CIW-5 deep-dive table
ciw5_deepdive_df = variant_per_book_df[variant_per_book_df['variant'] == 'indep_winsize_5'].copy()
ciw5_deepdive_df['error_gap_raw_vs_ma'] = ciw5_deepdive_df['mae'] - ciw5_deepdive_df['mae_ma']
ciw5_deepdive_df = ciw5_deepdive_df[
    ['book_id', 'title', 'split', 'T', 'mse', 'rmse', 'mae', 'mae_ma', 'corr', 'r2', 'error_gap_raw_vs_ma']
].sort_values(['split', 'book_id']).reset_index(drop=True)
ciw5_deepdive_df.to_csv(FINAL_TAB / 'ciw5_per_book_deepdive.csv', index=False)

# Determine selected clustering settings from quality tables (data-driven)
feature_quality = cluster_quality_df[cluster_quality_df['branch'] == 'feature'].copy()
feature_quality['_stability'] = feature_quality['kmeans_stability_ari'].fillna(-np.inf)
feature_quality['_db'] = feature_quality['davies_bouldin'].fillna(np.inf)
selected_feature_quality = feature_quality.sort_values(
    ['silhouette', '_stability', '_db', 'k', 'method'],
    ascending=[False, False, True, True, True],
    kind='mergesort',
).iloc[0]
selected_feature_method = str(selected_feature_quality['method'])
selected_feature_k = int(selected_feature_quality['k'])

selected_assign_df = cluster_assign_df.copy()
if {'method', 'k'}.issubset(selected_assign_df.columns):
    filt = selected_assign_df[
        (selected_assign_df['method'].astype(str) == selected_feature_method)
        & (selected_assign_df['k'].astype(int) == selected_feature_k)
    ]
    if not filt.empty:
        selected_assign_df = filt.copy()

selected_dtw_quality = cluster_quality_df[cluster_quality_df['branch'] == 'dtw'].sort_values(
    ['silhouette', 'k'], ascending=[False, True], kind='mergesort'
).iloc[0]
selected_dtw_k = int(selected_dtw_quality['k'])

# Cluster summary table for report
feature_profile = cluster_profile_df[cluster_profile_df['branch'] == 'feature'].copy()
feature_top3 = feature_profile[feature_profile['rank_abs_delta'] <= 3].sort_values(['cluster', 'rank_abs_delta'])

rep_centroid = cluster_rep_df[
    (cluster_rep_df['branch'] == 'feature') & (cluster_rep_df['role'] == 'centroid_medoid')
].copy()

cluster_sizes = selected_assign_df['cluster'].value_counts().sort_index()
cluster_rows = []
for cluster_id in sorted(cluster_sizes.index.tolist()):
    sub = feature_top3[feature_top3['cluster'] == cluster_id].sort_values('rank_abs_delta')
    feats = sub['feature'].tolist()[:3]
    feats = feats + [''] * (3 - len(feats))

    rep_sub = rep_centroid[rep_centroid['cluster'] == cluster_id]
    if rep_sub.empty:
        rep_book = 'N/A'
    else:
        rep_row = rep_sub.iloc[0]
        rep_book = f"{int(rep_row['book_id'])} | {rep_row['title']}"

    genre_row = cluster_genre_prop_df[cluster_genre_prop_df['cluster'] == cluster_id]
    if genre_row.empty:
        dominant_genre = 'Unknown'
        dominant_prop = float('nan')
    else:
        genre_row = genre_row.iloc[0]
        genre_cols = [c for c in cluster_genre_prop_df.columns if c != 'cluster']
        dominant_genre = max(genre_cols, key=lambda c: float(genre_row[c]))
        dominant_prop = float(genre_row[dominant_genre])

    cluster_rows.append({
        'cluster': int(cluster_id),
        'n_books': int(cluster_sizes.loc[cluster_id]),
        'top_feature_1': feats[0],
        'top_feature_2': feats[1],
        'top_feature_3': feats[2],
        'representative_book': rep_book,
        'dominant_genre': dominant_genre,
        'dominant_genre_prop': dominant_prop,
    })

cluster_summary_df = pd.DataFrame(cluster_rows).sort_values('cluster').reset_index(drop=True)
cluster_summary_df.to_csv(FINAL_TAB / 'cluster_summary_for_report.csv', index=False)

# Key results registry
key_rows = []


def add_metric(metric_key: str, value, source_file: str, source_row_filter: str, notes: str = ''):
    key_rows.append({
        'metric_key': metric_key,
        'value': value,
        'source_file': source_file,
        'source_row_filter': source_row_filter,
        'notes': notes,
    })


# Dataset and split metrics
add_metric('corpus::n_books', int(dataset_profile_df['book_id'].nunique()), str(FINAL_TAB / 'dataset_profile_for_report.csv'), 'unique(book_id)')
add_metric('corpus::n_chunks_total', int(dataset_profile_df['T'].sum()), str(FINAL_TAB / 'dataset_profile_for_report.csv'), 'sum(T)')
add_metric('corpus::train_novels', int((dataset_profile_df['split'] == 'train').sum()), str(FINAL_TAB / 'dataset_profile_for_report.csv'), 'split=train,count_rows')
add_metric('corpus::test_novels', int((dataset_profile_df['split'] == 'test').sum()), str(FINAL_TAB / 'dataset_profile_for_report.csv'), 'split=test,count_rows')
add_metric('corpus::min_T', int(dataset_profile_df['T'].min()), str(FINAL_TAB / 'dataset_profile_for_report.csv'), 'min(T)')
add_metric('corpus::max_T', int(dataset_profile_df['T'].max()), str(FINAL_TAB / 'dataset_profile_for_report.csv'), 'max(T)')

# Optimization settings (CIW-5 student)
add_metric('ciw5::train::seed', int(np.array(ciw5_model_npz['seed']).reshape(-1)[0]), str(required_sources['ciw5_model_npz']), 'seed[0]')
add_metric('ciw5::train::lr', float(np.array(ciw5_model_npz['lr']).reshape(-1)[0]), str(required_sources['ciw5_model_npz']), 'lr[0]')
add_metric('ciw5::train::epochs', int(np.array(ciw5_model_npz['epochs']).reshape(-1)[0]), str(required_sources['ciw5_model_npz']), 'epochs[0]')
add_metric('ciw5::train::batch_size', int(np.array(ciw5_model_npz['batch_size']).reshape(-1)[0]), str(required_sources['ciw5_model_npz']), 'batch_size[0]')
add_metric('ciw5::train::weight_decay', float(np.array(ciw5_model_npz['weight_decay']).reshape(-1)[0]), str(required_sources['ciw5_model_npz']), 'weight_decay[0]')

# Variant metrics and ranking diagnostics
for _, row in variant_selection_diagnostics.iterrows():
    key = str(row['variant_code'])
    split = str(row['split'])
    add_metric(f'variant::{key}::{split}::rmse', float(row['rmse']), str(required_sources['variant_global_metrics']), f'variant_code={key},split={split}')
    add_metric(f'variant::{key}::{split}::mae', float(row['mae']), str(required_sources['variant_global_metrics']), f'variant_code={key},split={split}')
    add_metric(f'variant::{key}::{split}::mae_ma5', float(row['mae_ma5']), str(required_sources['variant_global_metrics']), f'variant_code={key},split={split}')
    add_metric(f'variant::{key}::{split}::r2', float(row['r2']), str(required_sources['variant_global_metrics']), f'variant_code={key},split={split}')
    add_metric(f'variant::{key}::{split}::corr', float(row['corr']), str(required_sources['variant_global_metrics']), f'variant_code={key},split={split}')
    add_metric(f'variant::{key}::{split}::rank_trend_primary', int(row['rank_trend_primary']), str(FINAL_TAB / 'variant_selection_diagnostics.csv'), f'variant_code={key},split={split}')
    add_metric(f'variant::{key}::{split}::rank_raw_error', int(row['rank_raw_error']), str(FINAL_TAB / 'variant_selection_diagnostics.csv'), f'variant_code={key},split={split}')
    add_metric(f'variant::{key}::{split}::rank_corr', int(row['rank_corr']), str(FINAL_TAB / 'variant_selection_diagnostics.csv'), f'variant_code={key},split={split}')

add_metric(
    'variant::CIW-5::test::mae_drop_raw_to_ma5',
    float(variant_selection_diagnostics[(variant_selection_diagnostics['variant_code'] == 'CIW-5') & (variant_selection_diagnostics['split'] == 'test')]['mae'].iloc[0]
          - variant_selection_diagnostics[(variant_selection_diagnostics['variant_code'] == 'CIW-5') & (variant_selection_diagnostics['split'] == 'test')]['mae_ma5'].iloc[0]),
    str(FINAL_TAB / 'variant_selection_diagnostics.csv'),
    'variant_code=CIW-5,split=test,mae-mae_ma5',
)

add_metric('selected_variant_code', selected_variant_code, str(FINAL_TAB / 'variant_selection_diagnostics.csv'), 'split=test,rank_trend_primary=1', 'Trend-fidelity-first criterion')

# Pairwise label agreement (teacher side diagnostics)
for _, row in pairwise_df.iterrows():
    a = str(row['variant_a'])
    b = str(row['variant_b'])
    tag = f'{a}__vs__{b}'
    add_metric(f'teacher_pairwise::{tag}::mae', float(row['mae']), str(required_sources['variant_pairwise']), f'variant_a={a},variant_b={b}')
    add_metric(f'teacher_pairwise::{tag}::exact_match', float(row['exact_match']), str(required_sources['variant_pairwise']), f'variant_a={a},variant_b={b}')
    add_metric(f'teacher_pairwise::{tag}::corr', float(row['corr']), str(required_sources['variant_pairwise']), f'variant_a={a},variant_b={b}')

# Clustering metrics
add_metric('cluster::feature::selected_method', selected_feature_method, str(required_sources['cluster_quality']), f'branch=feature,method={selected_feature_method},k={selected_feature_k}')
add_metric('cluster::feature::selected_k', selected_feature_k, str(required_sources['cluster_quality']), f'branch=feature,method={selected_feature_method},k={selected_feature_k}')
add_metric('cluster::feature::silhouette', float(selected_feature_quality['silhouette']), str(required_sources['cluster_quality']), f'branch=feature,method={selected_feature_method},k={selected_feature_k}')
add_metric('cluster::feature::davies_bouldin', float(selected_feature_quality['davies_bouldin']), str(required_sources['cluster_quality']), f'branch=feature,method={selected_feature_method},k={selected_feature_k}')
add_metric('cluster::feature::calinski_harabasz', float(selected_feature_quality['calinski_harabasz']), str(required_sources['cluster_quality']), f'branch=feature,method={selected_feature_method},k={selected_feature_k}')
add_metric('cluster::feature::kmeans_stability_ari', float(selected_feature_quality['kmeans_stability_ari']) if not pd.isna(selected_feature_quality['kmeans_stability_ari']) else np.nan, str(required_sources['cluster_quality']), f'branch=feature,method={selected_feature_method},k={selected_feature_k}')

add_metric('cluster::dtw::selected_k', selected_dtw_k, str(required_sources['cluster_quality']), f"branch=dtw,method={selected_dtw_quality['method']},k={selected_dtw_k}")
add_metric('cluster::dtw::silhouette', float(selected_dtw_quality['silhouette']), str(required_sources['cluster_quality']), f"branch=dtw,method={selected_dtw_quality['method']},k={selected_dtw_k}")

ari = float(cluster_agree_df[(cluster_agree_df['row_type'] == 'metric') & (cluster_agree_df['metric'] == 'ari')]['value'].iloc[0])
nmi = float(cluster_agree_df[(cluster_agree_df['row_type'] == 'metric') & (cluster_agree_df['metric'] == 'nmi')]['value'].iloc[0])
add_metric('cluster::agreement::ari', ari, str(required_sources['cluster_agreement']), 'row_type=metric,metric=ari')
add_metric('cluster::agreement::nmi', nmi, str(required_sources['cluster_agreement']), 'row_type=metric,metric=nmi')

# CIW-5 per-book metrics in registry for traceability
for _, row in ciw5_deepdive_df.iterrows():
    bid = int(row['book_id'])
    split = str(row['split'])
    add_metric(f'ciw5::book::{bid}::{split}::mae', float(row['mae']), str(FINAL_TAB / 'ciw5_per_book_deepdive.csv'), f'book_id={bid},split={split}')
    add_metric(f'ciw5::book::{bid}::{split}::mae_ma5', float(row['mae_ma']), str(FINAL_TAB / 'ciw5_per_book_deepdive.csv'), f'book_id={bid},split={split}')
    add_metric(f'ciw5::book::{bid}::{split}::corr', float(row['corr']), str(FINAL_TAB / 'ciw5_per_book_deepdive.csv'), f'book_id={bid},split={split}')
    add_metric(f'ciw5::book::{bid}::{split}::error_gap_raw_vs_ma', float(row['error_gap_raw_vs_ma']), str(FINAL_TAB / 'ciw5_per_book_deepdive.csv'), f'book_id={bid},split={split}')

key_registry_df = pd.DataFrame(key_rows)
key_registry_df.to_csv(FINAL_TAB / 'key_results_registry.csv', index=False)

# Method claims checklist mapped to evidence
claim_rows = [
    {
        'claim_id': 'CLM01',
        'claim_text': 'The corpus contains 20 books with a deterministic novel-level split.',
        'metric_key_or_source': 'corpus::n_books;corpus::train_novels;corpus::test_novels',
    },
    {
        'claim_id': 'CLM02',
        'claim_text': 'The student model is trained with fixed optimization settings and L2 regularization.',
        'metric_key_or_source': 'ciw5::train::seed;ciw5::train::lr;ciw5::train::epochs;ciw5::train::batch_size;ciw5::train::weight_decay',
    },
    {
        'claim_id': 'CLM03',
        'claim_text': 'CIW-5 is selected by trend-fidelity-first criterion on the test split.',
        'metric_key_or_source': 'selected_variant_code;variant::CIW-5::test::rank_trend_primary',
    },
    {
        'claim_id': 'CLM04',
        'claim_text': 'CIW-5 has the strongest MA(5) trend-level error profile on test data.',
        'metric_key_or_source': 'variant::CIW-5::test::mae_ma5;variant::NC-1::test::mae_ma5;variant::SW-5::test::mae_ma5',
    },
    {
        'claim_id': 'CLM05',
        'claim_text': 'CIW-5 reduces error when evaluated on smoothed trajectories.',
        'metric_key_or_source': 'variant::CIW-5::test::mae_drop_raw_to_ma5',
    },
    {
        'claim_id': 'CLM06',
        'claim_text': 'Feature clustering uses a data-driven selection and DTW is retained as validation.',
        'metric_key_or_source': 'cluster::feature::selected_method;cluster::feature::selected_k;cluster::dtw::selected_k',
    },
    {
        'claim_id': 'CLM07',
        'claim_text': 'Feature and DTW clustering agreement is limited and should be interpreted cautiously.',
        'metric_key_or_source': 'cluster::agreement::ari;cluster::agreement::nmi',
    },
    {
        'claim_id': 'CLM08',
        'claim_text': 'Teacher-side variant protocols exhibit measurable disagreement.',
        'metric_key_or_source': 'path:' + str(required_sources['variant_pairwise']),
    },
    {
        'claim_id': 'CLM09',
        'claim_text': 'Per-book CIW-5 behavior is explicitly documented for all novels.',
        'metric_key_or_source': 'path:' + str(FINAL_TAB / 'ciw5_per_book_deepdive.csv'),
    },
    {
        'claim_id': 'CLM10',
        'claim_text': 'Final report figures and tables are reproducibly generated by stage-10 notebook.',
        'metric_key_or_source': 'path:' + str(FINAL_TAB / 'variant_selection_diagnostics.csv') + ';path:' + str(FINAL_TAB / 'key_results_registry.csv'),
    },
]

registry_keys = set(key_registry_df['metric_key'].astype(str).tolist())


def claim_status(refs: str) -> str:
    parts = [p.strip() for p in str(refs).split(';') if p.strip()]
    ok = True
    for p in parts:
        if p.startswith('path:'):
            ok = ok and Path(p.replace('path:', '', 1)).exists()
        else:
            ok = ok and (p in registry_keys)
    return 'mapped' if ok else 'missing'


method_claims_df = pd.DataFrame(claim_rows)
method_claims_df['status'] = method_claims_df['metric_key_or_source'].map(claim_status)
method_claims_df.to_csv(FINAL_TAB / 'method_claims_checklist.csv', index=False)

# Early checks used again in final integrity table
source_checks.append({
    'check': 'selected_variant_is_ciw5',
    'expected': 'CIW-5',
    'actual': selected_variant_code,
    'pass': selected_variant_code == 'CIW-5',
})
source_checks.append({
    'check': 'split_is_16_train_4_test',
    'expected': 'train=16,test=4',
    'actual': f"train={(dataset_profile_df['split'] == 'train').sum()},test={(dataset_profile_df['split'] == 'test').sum()}",
    'pass': ((dataset_profile_df['split'] == 'train').sum() == 16) and ((dataset_profile_df['split'] == 'test').sum() == 4),
})

print('Saved:', FINAL_TAB / 'dataset_profile_for_report.csv')
print('Saved:', FINAL_TAB / 'variant_selection_summary.csv')
print('Saved:', FINAL_TAB / 'variant_selection_diagnostics.csv')
print('Saved:', FINAL_TAB / 'ciw5_per_book_deepdive.csv')
print('Saved:', FINAL_TAB / 'cluster_summary_for_report.csv')
print('Saved:', FINAL_TAB / 'key_results_registry.csv')
print('Saved:', FINAL_TAB / 'method_claims_checklist.csv')
print('Selected variant:', selected_variant_code)



In [None]:
# Build curated figures for final report
variant_order = ['NC-1', 'SW-5', 'CIW-5']
variant_colors = {'NC-1': '#4c78a8', 'SW-5': '#f58518', 'CIW-5': '#54a24b'}

# Load generated support tables
variant_selection_summary = pd.read_csv(FINAL_TAB / 'variant_selection_summary.csv')
variant_selection_diagnostics = pd.read_csv(FINAL_TAB / 'variant_selection_diagnostics.csv')
ciw5_deepdive_df = pd.read_csv(FINAL_TAB / 'ciw5_per_book_deepdive.csv')

# Figure 01: pipeline overview diagram
fig, ax = plt.subplots(figsize=(16, 4.8))
ax.axis('off')

steps = [
    ('Get Data', 'Gutenberg corpus\n20 novels'),
    ('Sliding-Window Embeddings', 'Chunking and\nsentence-transformer vectors'),
    ('Teacher Labels', 'NC-1, SW-5, CIW-5\npseudo-ground truth'),
    ('Linear Semantic Basis', '1-layer perceptron\ny_hat = x^T w + b'),
    ('Variant Selection', 'Trend-fidelity first\nCIW-5 selected'),
    ('Clustering', 'Feature-primary\ntrajectory archetypes'),
    ('Applications', 'Interpretable semantic\ntime-series analytics'),
]

x_positions = np.linspace(0.05, 0.95, len(steps))
y = 0.5
w = 0.12
h = 0.32
for i, (title, desc) in enumerate(steps):
    x = x_positions[i] - w / 2
    box = FancyBboxPatch((x, y - h / 2), w, h, boxstyle='round,pad=0.02', ec='black', fc='#f8f9fb', lw=1.2)
    ax.add_patch(box)
    ax.text(x_positions[i], y + 0.06, title, ha='center', va='center', fontsize=10, fontweight='bold')
    ax.text(x_positions[i], y - 0.07, desc, ha='center', va='center', fontsize=9)
    if i < len(steps) - 1:
        ax.annotate('', xy=(x_positions[i + 1] - w / 2 + 0.01, y), xytext=(x_positions[i] + w / 2 - 0.01, y), arrowprops=dict(arrowstyle='->', lw=1.2))

ax.set_title('Teacher-Guided Semantic Basis Projection Workflow', fontsize=14, pad=14)
fig.tight_layout()
fig.savefig(FINAL_FIG / 'fig01_pipeline_overview.png', dpi=220, bbox_inches='tight')
plt.close(fig)

# Figure 02: variant comparison metrics on test split
test_metrics = variant_selection_summary[variant_selection_summary['split'] == 'test'].copy()
test_metrics = test_metrics.set_index('variant_code').loc[variant_order].reset_index()
metric_cols = ['rmse', 'mae', 'mae_ma5', 'r2', 'corr']
metric_titles = ['RMSE', 'MAE', f'MAE MA({MA_WINDOW})', 'R2', 'Correlation']

fig, axes = plt.subplots(1, 5, figsize=(22, 4.2))
for ax, metric, title in zip(axes, metric_cols, metric_titles):
    vals = test_metrics[metric].to_numpy(dtype=float)
    bars = ax.bar(np.arange(len(variant_order)), vals, color=[variant_colors[v] for v in variant_order], alpha=0.9)

    best_idx = int(np.argmin(vals)) if metric in ['rmse', 'mae', 'mae_ma5'] else int(np.argmax(vals))
    bars[best_idx].set_edgecolor('black')
    bars[best_idx].set_linewidth(2)

    for i, v in enumerate(vals):
        ax.text(i, v, f'{v:.3f}', ha='center', va='bottom', fontsize=8)

    ax.set_xticks(np.arange(len(variant_order)))
    ax.set_xticklabels(variant_order)
    ax.set_title(title)
    ax.grid(axis='y', alpha=0.2)

fig.suptitle('Test Split Variant Comparison (trend-fidelity-first ranking)', fontsize=13)
fig.tight_layout()
fig.savefig(FINAL_FIG / 'fig02_variant_comparison_test_metrics.png', dpi=220, bbox_inches='tight')
plt.close(fig)

# Figure 03: CIW-5 model behavior composite
img_paths_03 = [
    SRC_VARIANT_FIG / 'indep_prediction_scatter_train_test.png',
    SRC_VARIANT_FIG / 'indep_residual_hist_train_test.png',
    SRC_VARIANT_FIG / 'train_loss_curves_by_variant.png',
]
img_titles_03 = [
    'True vs Predicted (CIW-5)',
    'Residual Distribution (CIW-5)',
    'Loss Curves (all variants, CIW-5 included)',
]
fig, axes = plt.subplots(1, 3, figsize=(18, 5.2))
for ax, p, t in zip(axes, img_paths_03, img_titles_03):
    ax.imshow(mpimg.imread(p))
    ax.axis('off')
    ax.set_title(t, fontsize=10)
fig.suptitle('CIW-5 Linear Projection Behavior', fontsize=13)
fig.tight_layout()
fig.savefig(FINAL_FIG / 'fig03_ciw5_model_behavior.png', dpi=220, bbox_inches='tight')
plt.close(fig)

# Figure 04: CIW-5 test overlays montage
overlay_paths = sorted(SRC_VARIANT_FIG.glob('indep_novel_overlay_test_*.png'), key=lambda p: int(p.stem.split('_')[-1]))
fig, axes = plt.subplots(2, 2, figsize=(15, 9))
for ax, p in zip(axes.flatten(), overlay_paths):
    ax.imshow(mpimg.imread(p))
    ax.axis('off')
    ax.set_title(f'Test novel overlay: {p.stem.split("_")[-1]}', fontsize=10)
for ax in axes.flatten()[len(overlay_paths):]:
    ax.axis('off')
fig.suptitle('CIW-5 Test Novel Overlays', fontsize=13)
fig.tight_layout()
fig.savefig(FINAL_FIG / 'fig04_ciw5_test_overlays_reference.png', dpi=220, bbox_inches='tight')
plt.close(fig)

# Figure 05: feature cluster map
fig, ax = plt.subplots(figsize=(11, 8))
ax.imshow(mpimg.imread(SRC_CLUSTER_FIG / 'feature_pca_scatter_feature_clusters.png'))
ax.axis('off')
ax.set_title('Feature Cluster Map (CIW-5 derived features)', fontsize=13)
fig.tight_layout()
fig.savefig(FINAL_FIG / 'fig05_feature_cluster_map.png', dpi=220, bbox_inches='tight')
plt.close(fig)

# Figure 06: cluster genre composition composite
fig, axes = plt.subplots(1, 2, figsize=(16, 5.5))
axes[0].imshow(mpimg.imread(SRC_CLUSTER_FIG / 'genre_by_feature_cluster_counts.png'))
axes[0].axis('off')
axes[0].set_title('Genre counts by cluster', fontsize=11)
axes[1].imshow(mpimg.imread(SRC_CLUSTER_FIG / 'genre_by_feature_cluster_proportions.png'))
axes[1].axis('off')
axes[1].set_title('Genre proportions by cluster', fontsize=11)
fig.suptitle('Genre Composition of Feature Clusters', fontsize=13)
fig.tight_layout()
fig.savefig(FINAL_FIG / 'fig06_cluster_genre_composition.png', dpi=220, bbox_inches='tight')
plt.close(fig)

# Figure 07: feature-cluster signatures and member trajectories composite
fig, axes = plt.subplots(1, 2, figsize=(16, 5.5))
axes[0].imshow(mpimg.imread(SRC_CLUSTER_FIG / 'cluster_feature_signature_heatmap_top12.png'))
axes[0].axis('off')
axes[0].set_title('Cluster feature signatures', fontsize=11)
axes[1].imshow(mpimg.imread(SRC_CLUSTER_FIG / 'feature_cluster_member_trajectories_ma5.png'))
axes[1].axis('off')
axes[1].set_title('Feature-cluster member trajectories (MA5)', fontsize=11)
fig.suptitle('Feature-Cluster Signatures and MA5 Trajectory Archetypes', fontsize=13)
fig.tight_layout()
fig.savefig(FINAL_FIG / 'fig07_cluster_signatures_and_agreement.png', dpi=220, bbox_inches='tight')
plt.close(fig)

# Figure 08: variant rank sensitivity across criteria
diag_test = variant_selection_diagnostics[variant_selection_diagnostics['split'] == 'test'].copy()
diag_test = diag_test.set_index('variant_code').loc[variant_order].reset_index()
rank_cols = ['rank_trend_primary', 'rank_raw_error', 'rank_corr']
rank_titles = ['Trend-first rank', 'Raw-error rank', 'Correlation-first rank']
rank_mat = diag_test[rank_cols].to_numpy(dtype=float)

fig, ax = plt.subplots(figsize=(8.5, 4.8))
im = ax.imshow(rank_mat, cmap='YlGn_r', vmin=1, vmax=len(variant_order), aspect='auto')
for i in range(rank_mat.shape[0]):
    for j in range(rank_mat.shape[1]):
        ax.text(j, i, f'{int(rank_mat[i, j])}', ha='center', va='center', fontsize=11, fontweight='bold')
ax.set_xticks(np.arange(len(rank_cols)))
ax.set_xticklabels(rank_titles)
ax.set_yticks(np.arange(len(variant_order)))
ax.set_yticklabels(variant_order)
ax.set_title('Variant Rank Sensitivity on Test Split')
cbar = fig.colorbar(im, ax=ax, fraction=0.046, pad=0.04)
cbar.set_label('Rank (1 = best)', rotation=90)
fig.tight_layout()
fig.savefig(FINAL_FIG / 'fig08_variant_rank_sensitivity.png', dpi=220, bbox_inches='tight')
plt.close(fig)

# Figure 09: CIW-5 per-book test breakdown
ciw5_test = ciw5_deepdive_df[ciw5_deepdive_df['split'] == 'test'].copy().sort_values('mae_ma')

def short_title(t: str, n: int = 18) -> str:
    return t if len(t) <= n else t[: n - 3] + '...'

x = np.arange(len(ciw5_test))
labels = [f"{int(r.book_id)}\n{short_title(str(r.title), 20)}" for r in ciw5_test.itertuples()]
width = 0.35

fig, ax1 = plt.subplots(figsize=(12, 5.2))
ax1.bar(x - width / 2, ciw5_test['mae'].to_numpy(float), width=width, color='#4c78a8', label='Raw MAE')
ax1.bar(x + width / 2, ciw5_test['mae_ma'].to_numpy(float), width=width, color='#54a24b', label=f'MA({MA_WINDOW}) MAE')
ax1.set_xticks(x)
ax1.set_xticklabels(labels)
ax1.set_ylabel('MAE')
ax1.set_title('CIW-5 Test-Novel Error Breakdown (Raw vs MA(5))')
ax1.grid(axis='y', alpha=0.2)

ax2 = ax1.twinx()
ax2.plot(x, ciw5_test['corr'].to_numpy(float), color='#f58518', marker='o', lw=2, label='Raw corr')
ax2.set_ylabel('Correlation')

h1, l1 = ax1.get_legend_handles_labels()
h2, l2 = ax2.get_legend_handles_labels()
ax1.legend(h1 + h2, l1 + l2, loc='upper right')
fig.tight_layout()
fig.savefig(FINAL_FIG / 'fig09_ciw5_per_book_test_breakdown.png', dpi=220, bbox_inches='tight')
plt.close(fig)

# Figure 10: contribution and use-cases map
fig, ax = plt.subplots(figsize=(14, 7.5))
ax.axis('off')

left_x, mid_x, right_x = 0.07, 0.39, 0.71
box_w, box_h = 0.22, 0.18
rows_y = [0.78, 0.52, 0.26]

left_text = [
    ('Representation', 'Sliding-window embeddings\nfrom narrative chunks'),
    ('Teacher signals', 'LLM pseudo-ground truth\nunder three protocols'),
    ('Student extraction', 'Linear semantic basis\nprojection'),
]
mid_text = [
    ('Core contribution', 'Semantic-to-time-series\nprojection framework'),
    ('Validation logic', 'Trend-first variant selection\nplus cluster structure'),
    ('Interpretability', 'Explicit basis vector,\nbook-level archetypes'),
]
right_text = [
    ('Applied analytics', 'Pacing diagnostics\nand cross-book comparison'),
    ('Workflow reuse', 'New abstract semantics\nwith the same pipeline'),
    ('Research extension', 'Teacher-student signal\ndistillation studies'),
]

for y, (a1, a2), (b1, b2), (c1, c2) in zip(rows_y, left_text, mid_text, right_text):
    for x0, t1, t2, fc in [
        (left_x, a1, a2, '#edf2fb'),
        (mid_x, b1, b2, '#e9f7ef'),
        (right_x, c1, c2, '#fff4e6'),
    ]:
        rect = FancyBboxPatch((x0, y), box_w, box_h, boxstyle='round,pad=0.015', ec='black', fc=fc, lw=1.1)
        ax.add_patch(rect)
        ax.text(x0 + box_w / 2, y + 0.115, t1, ha='center', va='center', fontsize=10, fontweight='bold')
        ax.text(x0 + box_w / 2, y + 0.055, t2, ha='center', va='center', fontsize=9)

    ax.annotate('', xy=(mid_x - 0.01, y + box_h / 2), xytext=(left_x + box_w + 0.01, y + box_h / 2), arrowprops=dict(arrowstyle='->', lw=1.2))
    ax.annotate('', xy=(right_x - 0.01, y + box_h / 2), xytext=(mid_x + box_w + 0.01, y + box_h / 2), arrowprops=dict(arrowstyle='->', lw=1.2))

ax.set_title('Method Contribution and Use-Case Map', fontsize=14, pad=14)
fig.tight_layout()
fig.savefig(FINAL_FIG / 'fig10_contribution_and_use_cases_map.png', dpi=220, bbox_inches='tight')
plt.close(fig)

# Figure 11: dedicated feature-cluster member trajectories (MA5)
fig, ax = plt.subplots(figsize=(14, 9))
ax.imshow(mpimg.imread(SRC_CLUSTER_FIG / 'feature_cluster_member_trajectories_ma5.png'))
ax.axis('off')
ax.set_title('Feature Cluster Member Trajectories (MA5)', fontsize=13)
fig.tight_layout()
fig.savefig(FINAL_FIG / 'fig11_feature_cluster_member_trajectories_ma5.png', dpi=220, bbox_inches='tight')
plt.close(fig)

print('Generated curated figures in', FINAL_FIG)



In [None]:
# Render FINAL_REPORT.md and OTHER_EXPERIMENTS.md
import re

# Utility to fetch registry values
reg = pd.read_csv(FINAL_TAB / 'key_results_registry.csv')


def metric(key: str, digits: int = 3):
    v = reg.loc[reg['metric_key'] == key, 'value'].iloc[0]
    try:
        vf = float(v)
        return f"{vf:.{digits}f}"
    except Exception:
        return str(v)


variant_table = pd.read_csv(FINAL_TAB / 'variant_selection_summary.csv')
variant_diag = pd.read_csv(FINAL_TAB / 'variant_selection_diagnostics.csv')
variant_test = variant_diag[variant_diag['split'] == 'test'].sort_values('rank_trend_primary')
cluster_summary = pd.read_csv(FINAL_TAB / 'cluster_summary_for_report.csv')
dataset_profile = pd.read_csv(FINAL_TAB / 'dataset_profile_for_report.csv')
ciw5_deepdive = pd.read_csv(FINAL_TAB / 'ciw5_per_book_deepdive.csv')
claims_df = pd.read_csv(FINAL_TAB / 'method_claims_checklist.csv')


def df_to_md(df: pd.DataFrame, cols: list[str], float_digits: int = 3) -> str:
    d = df[cols].copy()
    out = []
    out.append('| ' + ' | '.join(cols) + ' |')
    out.append('|' + '|'.join(['---'] * len(cols)) + '|')
    for _, row in d.iterrows():
        vals = []
        for c in cols:
            v = row[c]
            if isinstance(v, float):
                vals.append(f'{v:.{float_digits}f}')
            else:
                vals.append(str(v).replace('|', '\|'))
        out.append('| ' + ' | '.join(vals) + ' |')
    return chr(10).join(out)


split_profile = (
    dataset_profile
    .groupby('split', as_index=False)
    .agg(n_books=('book_id', 'nunique'), n_chunks=('T', 'sum'), median_T=('T', 'median'), min_T=('T', 'min'), max_T=('T', 'max'))
)

variant_md_table = df_to_md(
    variant_test,
    ['variant_code', 'rmse', 'mae', 'mae_ma5', 'r2', 'corr', 'rank_trend_primary', 'rank_raw_error', 'rank_corr'],
)

cluster_md_table = df_to_md(
    cluster_summary,
    ['cluster', 'n_books', 'top_feature_1', 'top_feature_2', 'top_feature_3', 'representative_book', 'dominant_genre', 'dominant_genre_prop'],
)

split_md_table = df_to_md(split_profile, ['split', 'n_books', 'n_chunks', 'median_T', 'min_T', 'max_T'])

ciw5_test_md = df_to_md(
    ciw5_deepdive[ciw5_deepdive['split'] == 'test'].sort_values('mae_ma'),
    ['book_id', 'title', 'T', 'mae', 'mae_ma', 'corr', 'error_gap_raw_vs_ma'],
)

final_report_lines = []
final_report_lines.append('# Teacher-Guided Semantic Basis Projection: A General Semantic-to-Time-Series Framework (Excitement Case Study)')
final_report_lines.append('')
final_report_lines.append('## Executive Abstract')
final_report_lines.append('This report presents a full project narrative for a method that maps abstract semantic constructs into interpretable time series. The concrete case study is narrative excitement, but the method is intentionally formulated as a general framework. The pipeline starts from long-form text, constructs sliding-window embedding trajectories, produces LLM teacher labels under three protocols, learns a linear semantic basis projection with a one-layer perceptron, selects the best teacher protocol using trend-fidelity criteria, and then analyzes structure through unsupervised clustering.')
final_report_lines.append('')
final_report_lines.append('The central hypothesis is that sentence-embedding geometry contains latent semantic directions for abstract concepts such as excitement. Instead of maximizing variance as PCA does, this work optimizes semantic alignment to teacher labels. Under the locked selection rule, CIW-5 is selected as the primary teacher protocol. The results support a practical workflow for teacher-guided, interpretable semantic signal extraction from embeddings.')
final_report_lines.append('')

final_report_lines.append('## 1. Project Objective and Contribution Statement')
final_report_lines.append('The project objective is to establish and evaluate a reproducible method for converting abstract semantics into chunk-level time-series signals that are interpretable, measurable, and suitable for downstream analysis. This differs from conventional document-level sentiment analysis because the focus is trajectory behavior across narrative progression.')
final_report_lines.append('')
final_report_lines.append('Main contribution of this stage:')
final_report_lines.append('1. A teacher-guided semantic basis projection formulation that is simple and explicit.')
final_report_lines.append('2. A variant-comparison protocol for teacher labels that prioritizes trend fidelity.')
final_report_lines.append('3. A clustering analysis layer over the selected signal to derive pacing archetypes and genre-linked structure.')
final_report_lines.append('4. A packaging workflow where every claim is mapped to reproducible artifacts.')
final_report_lines.append('')

final_report_lines.append('## 2. Problem Formulation and Hypothesis')
final_report_lines.append('Let `x_t ∈ R^D` be the sentence embedding of chunk `t` for a book. The student model predicts semantic intensity with a single linear map:')
final_report_lines.append('')
final_report_lines.append('`ŷ_t = x_t^T w + b`')
final_report_lines.append('')
final_report_lines.append('where `w ∈ R^D` is the semantic basis vector and `b` is a scalar bias. Training minimizes mean squared error with L2 regularization:')
final_report_lines.append('')
final_report_lines.append('`L = (1/N) Σ_t (ŷ_t - y_t)^2 + λ ||w||_2^2`')
final_report_lines.append('')
final_report_lines.append('Hypothesis: if excitement is encoded in embedding geometry, a supervised linear axis should recover trend-aligned signals against teacher labels. This objective is distinct from PCA, which optimizes variance explanation without semantic supervision.')
final_report_lines.append('')

final_report_lines.append('## 3. Data and Representation Pipeline')
final_report_lines.append('The corpus contains `{}` novels and `{}` total chunks across all books. Chunk counts range from `{}` to `{}` per book. Novel-level split is deterministic and leakage-safe, with `{}` train novels and `{}` test novels.'.format(
    metric('corpus::n_books', 0),
    metric('corpus::n_chunks_total', 0),
    metric('corpus::min_T', 0),
    metric('corpus::max_T', 0),
    metric('corpus::train_novels', 0),
    metric('corpus::test_novels', 0),
))
final_report_lines.append('')
final_report_lines.append('Split profile:')
final_report_lines.append('')
final_report_lines.append(split_md_table)
final_report_lines.append('')
final_report_lines.append('Representation route used in this report:')
final_report_lines.append('`Data -> Sliding-window embeddings -> Teacher pseudo-ground truth variants -> Linear semantic basis projection -> Variant selection -> Clustering -> Utility analysis`')
final_report_lines.append('')

final_report_lines.append('## 4. Teacher Protocols and Pseudo-Ground-Truth Design')
final_report_lines.append('Teacher labels are generated by LLM judging on a 0-4 excitement scale. Labels are treated as pseudo-ground truth because they are model-derived supervision, not direct human annotation. Three variants are used:')
final_report_lines.append('1. `NC-1`: No-Context Chunk Teacher Labels (`label.npy`).')
final_report_lines.append('2. `SW-5`: Shared-Window Labels (`label_winsize_5.npy`).')
final_report_lines.append('3. `CIW-5`: Context-Window Independent Labels (`label_indep_winsize_5.npy`).')
final_report_lines.append('')
final_report_lines.append('The variant study is essential because teacher protocol changes alter supervision smoothness, local consistency, and calibration behavior.')
final_report_lines.append('')

final_report_lines.append('## 5. Student Model: Semantic Basis Extraction')
final_report_lines.append('The student is a one-layer perceptron over standardized embeddings. The learned vector `w` is interpreted as a semantic basis direction in embedding space rather than a black-box latent representation. This gives a clear mapping between representation and predicted signal while preserving computational simplicity.')
final_report_lines.append('')
final_report_lines.append('Training configuration for CIW-5 model (from saved model artifact):')
final_report_lines.append('1. `seed = {}`'.format(metric('ciw5::train::seed', 0)))
final_report_lines.append('2. `lr = {}`'.format(metric('ciw5::train::lr', 5)))
final_report_lines.append('3. `epochs = {}`'.format(metric('ciw5::train::epochs', 0)))
final_report_lines.append('4. `batch_size = {}`'.format(metric('ciw5::train::batch_size', 0)))
final_report_lines.append('5. `weight_decay = {}`'.format(metric('ciw5::train::weight_decay', 6)))
final_report_lines.append('')

final_report_lines.append('## 6. Evaluation Protocol and Trend-Fidelity Criterion')
final_report_lines.append('Primary selection policy is trend-fidelity-first on test data with MA(5) smoothing. For each variant, ranking is determined by:')
final_report_lines.append('1. Lowest `MAE_MA5`')
final_report_lines.append('2. Then lowest raw `MAE`')
final_report_lines.append('3. Then lowest `RMSE`')
final_report_lines.append('')
final_report_lines.append('Secondary diagnostics (`R2`, correlation) are reported for context but do not override the primary criterion.')
final_report_lines.append('')

final_report_lines.append('## 7. Variant Study Results and Selection Rationale')
final_report_lines.append('Test-split variant diagnostics:')
final_report_lines.append('')
final_report_lines.append(variant_md_table)
final_report_lines.append('')
final_report_lines.append('Interpretation:')
final_report_lines.append('1. `CIW-5` ranks first by the locked trend-fidelity rule.')
final_report_lines.append('2. `CIW-5` test raw metrics are `RMSE={}`, `MAE={}`.'.format(metric('variant::CIW-5::test::rmse'), metric('variant::CIW-5::test::mae')))
final_report_lines.append('3. `CIW-5` trend metric is `MAE_MA5={}`, with raw-to-smoothed drop `{}`.'.format(metric('variant::CIW-5::test::mae_ma5'), metric('variant::CIW-5::test::mae_drop_raw_to_ma5')))
final_report_lines.append('4. `selected_variant_code = {}` based on deterministic ranking.'.format(metric('selected_variant_code', 0)))
final_report_lines.append('')

final_report_lines.append('## 8. Selected Variant (CIW-5) Deep Behavior Analysis')
final_report_lines.append('Per-test-book CIW-5 diagnostics are summarized below. `error_gap_raw_vs_ma` indicates the improvement from raw MAE to MA(5) MAE.')
final_report_lines.append('')
final_report_lines.append(ciw5_test_md)
final_report_lines.append('')
final_report_lines.append('This table supports two conclusions:')
final_report_lines.append('1. Trend-level agreement is consistently better than chunk-level agreement.')
final_report_lines.append('2. Book-level heterogeneity remains substantial, so deployment should prioritize comparative trend profiling over absolute chunk score decisions.')
final_report_lines.append('')

final_report_lines.append('## 9. Clustering on Selected Signal and Archetype Interpretation')
final_report_lines.append('Clustering in the final presentation is applied to CIW-5 derived trajectory features using the feature branch.')
final_report_lines.append('')
final_report_lines.append('Clustering input definition in this project:')
final_report_lines.append('1. Feature-branch clustering is performed on a per-book feature vector extracted from the **raw CIW-5 trajectory**.')
final_report_lines.append('2. This feature vector also includes three MA(5)-derived summary features (`mean_ma5`, `std_ma5`, `p95_ma5`).')
final_report_lines.append('3. Therefore, clustering is **not** performed on CIW-5 MA(5)-only sequence values. It is performed on a mixed descriptor set dominated by raw CIW-5 statistics plus MA(5) summaries.')
final_report_lines.append('4. MA(5) trajectories are used for additional visualization and archetype interpretation panels.')
final_report_lines.append('')
final_report_lines.append('Extracted per-book features from CIW-5 time series:')
final_report_lines.append('1. Length and level/distribution: `T`, `mean_y`, `std_y`, `median_y`, `iqr_y`, `min_y`, `max_y`, `p10_y`, `p90_y`, `range_y`.')
final_report_lines.append('2. Label composition: `prop_label_0`, `prop_label_1`, `prop_label_2`, `prop_label_3`, `prop_label_4`, `entropy_labels`.')
final_report_lines.append('3. Local dynamics: `mean_abs_diff`, `std_diff`, `p95_abs_diff`, `jump_ge_2_rate`, `up_rate`, `down_rate`, `flat_rate`, `lag1_autocorr`, `sign_change_rate`.')
final_report_lines.append('4. Position/trend structure: `corr_with_position`, `slope_position`, `mean_early`, `mean_mid`, `mean_late`.')
final_report_lines.append('5. Smoothed summaries (MA5): `mean_ma5`, `std_ma5`, `p95_ma5`.')
final_report_lines.append('')
final_report_lines.append('Selected feature configuration: method `{}`, `k={}`, silhouette `{}`, stability ARI `{}`.'.format(
    metric('cluster::feature::selected_method', 0),
    metric('cluster::feature::selected_k', 0),
    metric('cluster::feature::silhouette'),
    metric('cluster::feature::kmeans_stability_ari'),
))
final_report_lines.append('')
final_report_lines.append('Cluster summary (feature-primary):')
final_report_lines.append('')
final_report_lines.append(cluster_md_table)
final_report_lines.append('')
final_report_lines.append('Reading guidance: feature clusters are interpreted through engineered trajectory descriptors and MA(5) member trajectories. This keeps the final presentation focused on interpretable archetypes from CIW-5 features.')
final_report_lines.append('')

final_report_lines.append('## 10. Figure Explanations and Evidence')
fig_sections = [
    ('Figure 1. Pipeline Overview', 'fig01_pipeline_overview.png',
     'The full workflow from data ingestion to semantic time-series applications.',
     'Read left to right. Each box is a stage and arrows indicate dependency flow.',
     'The report contribution is centered on the supervised semantic-axis extraction stage, not on unsupervised variance decomposition.'),
    ('Figure 2. Variant Comparison on Test Split', 'fig02_variant_comparison_test_metrics.png',
     'Comparative test metrics for NC-1, SW-5, and CIW-5 across raw and smoothed errors plus correlation diagnostics.',
     'For RMSE/MAE/MAE_MA5 lower is better. For R2/correlation higher is better. Selection still follows trend-first ranking.',
     'CIW-5 is selected because it is best on the primary trend metric while remaining competitive on raw metrics.'),
    ('Figure 3. CIW-5 Model Behavior', 'fig03_ciw5_model_behavior.png',
     'Scatter, residual, and training diagnostics associated with the selected CIW-5 student model.',
     'Use scatter for calibration spread, residual histogram for bias shape, and loss curves for optimization stability.',
     'The model is stable and interpretable, but chunk-level residual spread confirms that trend-level interpretation is the safer use mode.'),
    ('Figure 4. CIW-5 Test Overlays', 'fig04_ciw5_test_overlays_reference.png',
     'Overlay of teacher and student trajectories for all held-out test novels.',
     'Track directional movement and pacing regions instead of exact pointwise matching.',
     'The selected model preserves broad narrative dynamics on unseen novels, which justifies trend-level utility claims.'),
    ('Figure 5. Feature Cluster Map', 'fig05_feature_cluster_map.png',
     'Feature-space map of books with selected feature-cluster assignments.',
     'Each point is one book and color indicates cluster identity.',
     'The map provides the geometric context for archetype interpretation in the cluster summary table.'),
    ('Figure 6. Genre Composition by Cluster', 'fig06_cluster_genre_composition.png',
     'Cluster composition shown in both counts and row-normalized proportions by `genre_primary`.',
     'Left panel shows absolute counts. Right panel shows within-cluster composition.',
     'Genre concentration varies across clusters, supporting the claim that the extracted signal captures narratively meaningful structure.'),
    ('Figure 7. Feature Cluster Signatures and Member Trajectories', 'fig07_cluster_signatures_and_agreement.png',
     'Top feature signatures by cluster together with MA(5) member-trajectory archetypes.',
     'Use the signature panel to read which features distinguish each cluster, then inspect MA(5) trajectory panels for pacing shape patterns.',
     'Together, these views connect feature-level semantics to observable cluster trajectory behavior in CIW-5.'),
    ('Figure 8. Variant Rank Sensitivity', 'fig08_variant_rank_sensitivity.png',
     'Rank matrix of variants under trend-first, raw-error-first, and correlation-first criteria.',
     'Lower rank numbers are better. Compare rows (variants) across columns (criteria).',
     'This figure makes selection logic transparent and shows how conclusions shift under alternate objectives.'),
    ('Figure 9. CIW-5 Per-Book Test Breakdown', 'fig09_ciw5_per_book_test_breakdown.png',
     'Book-level raw MAE and MA(5) MAE bars with correlation line for the test set.',
     'Compare paired bars within each book to inspect smoothing gains and use line markers for correlation context.',
     'The figure quantifies where trend-level gains are strongest and where residual uncertainty remains.'),
    ('Figure 10. Contribution and Use-Cases Map', 'fig10_contribution_and_use_cases_map.png',
     'Conceptual mapping from method components to research contributions and practical uses.',
     'Read each row as input component -> methodological contribution -> usage pathway.',
     'The project is positioned as a reusable semantic-to-time-series framework, with excitement as the demonstrated task.'),
    ('Figure 11. Feature Cluster Member Trajectories (MA5)', 'fig11_feature_cluster_member_trajectories_ma5.png',
     'A dedicated high-resolution view of MA(5) member trajectories for each feature cluster.',
     'Each subplot represents one feature cluster with thin lines for member books and a bold centroid trajectory.',
     'This figure provides direct visual evidence of pacing archetypes that define the final feature-cluster interpretation.'),
]

for title, fig_name, what_txt, how_txt, insight_txt in fig_sections:
    final_report_lines.append(f'### {title}')
    final_report_lines.append(f'![{title}](../outputs/final_report/figures/{fig_name})')
    final_report_lines.append('')
    final_report_lines.append(f'- What this figure shows: {what_txt}')
    final_report_lines.append(f'- How to read it: {how_txt}')
    final_report_lines.append(f'- Interpretation and insight: {insight_txt}')
    final_report_lines.append('')

final_report_lines.append('## 11. Utility, Generalization, and Deployment Scenarios')
final_report_lines.append('The method is useful when a team needs interpretable, chunk-level semantic trajectories from high-dimensional embeddings and cannot afford heavy black-box sequence models. Since the student is linear and trained with explicit supervision, each run produces a transparent semantic basis that is fast to apply to new data.')
final_report_lines.append('')
final_report_lines.append('Potential use scenarios:')
final_report_lines.append('1. Narrative pacing analytics for editorial workflow support.')
final_report_lines.append('2. Cross-book comparative profiling for literary or media research.')
final_report_lines.append('3. Teacher-student distillation pipeline for other abstract constructs (for example suspense, urgency, or emotional intensity).')
final_report_lines.append('4. Lightweight semantic monitoring where interpretability and reproducibility are mandatory.')
final_report_lines.append('')

final_report_lines.append('## 12. Limitations and Threats to Validity')
final_report_lines.append('1. Teacher labels are pseudo-ground truth and can contain systematic LLM bias.')
final_report_lines.append('2. Corpus size is small (`20` novels), limiting external validity.')
final_report_lines.append('3. The student is linear, so nonlinear semantic structure may be underfit.')
final_report_lines.append('4. Cluster structure is sensitive to feature design and sample size.')
final_report_lines.append('5. Correlation and R2 behavior can diverge from trend-error objectives, so objective choice must be explicit.')
final_report_lines.append('')

final_report_lines.append('## 13. Reproducibility and Artifact Guide')
final_report_lines.append('Generated in stage `10_final_teacher_guided_semantic_basis_report.ipynb` with deterministic configuration (`SEED=42`, `MA_WINDOW=5`).')
final_report_lines.append('')
final_report_lines.append('Core evidence artifacts:')
final_report_lines.append('1. `../outputs/final_report/tables/variant_selection_summary.csv`')
final_report_lines.append('2. `../outputs/final_report/tables/variant_selection_diagnostics.csv`')
final_report_lines.append('3. `../outputs/final_report/tables/dataset_profile_for_report.csv`')
final_report_lines.append('4. `../outputs/final_report/tables/ciw5_per_book_deepdive.csv`')
final_report_lines.append('5. `../outputs/final_report/tables/cluster_summary_for_report.csv`')
final_report_lines.append('6. `../outputs/final_report/tables/key_results_registry.csv`')
final_report_lines.append('7. `../outputs/final_report/tables/method_claims_checklist.csv`')
final_report_lines.append('8. `../outputs/final_report/tables/report_integrity_checks.csv`')
final_report_lines.append('')
final_report_lines.append('Related appendix: `docs/OTHER_EXPERIMENTS.md` documents Twist Signal and PCA tracks as secondary experiments, intentionally separated from the main claim path.')
final_report_lines.append('')

final_report_lines.append('## 14. Conclusion')
final_report_lines.append('This project demonstrates a concrete path for converting abstract semantics into interpretable time series by combining embedding trajectories, LLM teacher supervision, and linear semantic basis extraction. In this case study, CIW-5 is the most suitable teacher protocol under trend-first selection. The resulting signal supports meaningful clustering and practical downstream interpretation. More broadly, the workflow provides a reusable pattern for teacher-guided semantic projection where transparency, reproducibility, and analytical utility are first-class goals.')
final_report_lines.append('')

final_report_lines.append('## Claim Provenance')
final_report_lines.append('- Core registry: `../outputs/final_report/tables/key_results_registry.csv`')
final_report_lines.append('- Claim checklist: `../outputs/final_report/tables/method_claims_checklist.csv`')
final_report_lines.append('- Report integrity: `../outputs/final_report/tables/report_integrity_checks.csv`')

final_report_path = DOCS_ROOT / 'FINAL_REPORT.md'
final_report_path.write_text(chr(10).join(final_report_lines) + chr(10), encoding='utf-8')

# Keep the appendix narrative isolated
other_lines = []
other_lines.append('# Other Experiments: Twist Signal and PCA Tracks')
other_lines.append('')
other_lines.append('## Scope')
other_lines.append('This document summarizes additional experiments completed in the project that are not part of the primary final claim. The primary claim is centered on Teacher-Guided Semantic Basis Projection. The experiments below remain valuable, but they are intentionally separated to keep narrative focus clear.')
other_lines.append('')
other_lines.append('## Twist Signal Track')
other_lines.append('Twist Signal experiments model local novelty dynamics from embedding trajectories using `s_t` and acceleration `a_t`. This branch supports exploratory narrative-change analysis and peak detection.')
other_lines.append('')
other_lines.append('Key outputs:')
other_lines.append('- `../outputs/features.csv`')
other_lines.append('- `../outputs/clusters_kmeans.csv`')
other_lines.append('- `../outputs/clusters_hier.csv`')
other_lines.append('- `../outputs/dtw_distance_k7.npy`')
other_lines.append('- `../outputs/eda/`')
other_lines.append('')
other_lines.append('## PCA Component Track')
other_lines.append('PCA experiments analyze unsupervised axes of variance and their temporal behavior across books. This is useful for structural diagnostics and exploratory component interpretation.')
other_lines.append('')
other_lines.append('Key outputs:')
other_lines.append('- `../outputs/pca/global_pca_fit.npz`')
other_lines.append('- `../outputs/pca/global_pca_fit_meta.json`')
other_lines.append('- `../outputs/pca/global_pca_variance_summary.csv`')
other_lines.append('- `../outputs/pca_analysis/`')
other_lines.append('')
other_lines.append('## Why This Is Secondary in the Final Narrative')
other_lines.append('The final narrative aims to evaluate supervised extraction of one specific semantic basis from embeddings using teacher labels. Twist Signal and PCA branches address different questions. They are retained as supporting evidence of broad project exploration and as future integration candidates, but they are not used as primary evidence for the teacher-guided semantic basis claim.')

other_report_path = DOCS_ROOT / 'OTHER_EXPERIMENTS.md'
other_report_path.write_text(chr(10).join(other_lines) + chr(10), encoding='utf-8')

# Integrity checks for generated docs and image links
fig_files = sorted([p.name for p in FINAL_FIG.glob('*.png')])
required_figs = [
    'fig01_pipeline_overview.png',
    'fig02_variant_comparison_test_metrics.png',
    'fig03_ciw5_model_behavior.png',
    'fig04_ciw5_test_overlays_reference.png',
    'fig05_feature_cluster_map.png',
    'fig06_cluster_genre_composition.png',
    'fig07_cluster_signatures_and_agreement.png',
    'fig08_variant_rank_sensitivity.png',
    'fig09_ciw5_per_book_test_breakdown.png',
    'fig10_contribution_and_use_cases_map.png',
    'fig11_feature_cluster_member_trajectories_ma5.png',
]
missing_figs = [f for f in required_figs if f not in fig_files]

required_tables = [
    'dataset_profile_for_report.csv',
    'variant_selection_summary.csv',
    'variant_selection_diagnostics.csv',
    'ciw5_per_book_deepdive.csv',
    'cluster_summary_for_report.csv',
    'key_results_registry.csv',
    'method_claims_checklist.csv',
]
missing_tables = [t for t in required_tables if not (FINAL_TAB / t).exists()]

final_text = final_report_path.read_text(encoding='utf-8')
other_text = other_report_path.read_text(encoding='utf-8')
emdash_present = ('—' in final_text) or ('—' in other_text)

# Validate embedded image links
image_refs = re.findall(r'!\[[^\]]*\]\(([^)]+)\)', final_text)
missing_image_links = []
for ref in image_refs:
    resolved = (final_report_path.parent / ref).resolve()
    if not resolved.exists():
        missing_image_links.append(ref)

# Claims completeness
claims_df = pd.read_csv(FINAL_TAB / 'method_claims_checklist.csv')
claims_all_mapped = bool((claims_df['status'] == 'mapped').all())

# CIW-5 rank checks
variant_diag_df = pd.read_csv(FINAL_TAB / 'variant_selection_diagnostics.csv')
ciw5_rank = int(variant_diag_df[(variant_diag_df['split'] == 'test') & (variant_diag_df['variant_code'] == 'CIW-5')]['rank_trend_primary'].iloc[0])

split_profile = pd.read_csv(FINAL_TAB / 'dataset_profile_for_report.csv')
train_count = int((split_profile['split'] == 'train').sum())
test_count = int((split_profile['split'] == 'test').sum())

integrity_rows = []
integrity_rows.extend(source_checks)
integrity_rows.extend([
    {
        'check': 'dataset_profile_for_report_exists',
        'expected': True,
        'actual': (FINAL_TAB / 'dataset_profile_for_report.csv').exists(),
        'pass': (FINAL_TAB / 'dataset_profile_for_report.csv').exists(),
    },
    {
        'check': 'variant_selection_diagnostics_exists',
        'expected': True,
        'actual': (FINAL_TAB / 'variant_selection_diagnostics.csv').exists(),
        'pass': (FINAL_TAB / 'variant_selection_diagnostics.csv').exists(),
    },
    {
        'check': 'ciw5_per_book_deepdive_exists',
        'expected': True,
        'actual': (FINAL_TAB / 'ciw5_per_book_deepdive.csv').exists(),
        'pass': (FINAL_TAB / 'ciw5_per_book_deepdive.csv').exists(),
    },
    {
        'check': 'method_claims_checklist_exists',
        'expected': True,
        'actual': (FINAL_TAB / 'method_claims_checklist.csv').exists(),
        'pass': (FINAL_TAB / 'method_claims_checklist.csv').exists(),
    },
    {
        'check': 'required_table_count',
        'expected': len(required_tables),
        'actual': len(required_tables) - len(missing_tables),
        'pass': len(missing_tables) == 0,
    },
    {
        'check': 'curated_figure_count',
        'expected': len(required_figs),
        'actual': len(required_figs) - len(missing_figs),
        'pass': len(missing_figs) == 0,
    },
    {
        'check': 'final_report_exists',
        'expected': True,
        'actual': final_report_path.exists(),
        'pass': final_report_path.exists(),
    },
    {
        'check': 'other_experiments_exists',
        'expected': True,
        'actual': other_report_path.exists(),
        'pass': other_report_path.exists(),
    },
    {
        'check': 'embedded_image_links_exist',
        'expected': True,
        'actual': len(missing_image_links) == 0,
        'pass': len(missing_image_links) == 0,
    },
    {
        'check': 'claims_all_mapped',
        'expected': True,
        'actual': claims_all_mapped,
        'pass': claims_all_mapped,
    },
    {
        'check': 'no_emdash_in_docs',
        'expected': True,
        'actual': not emdash_present,
        'pass': not emdash_present,
    },
    {
        'check': 'ciw5_rank_1_in_diagnostics',
        'expected': 1,
        'actual': ciw5_rank,
        'pass': ciw5_rank == 1,
    },
    {
        'check': 'split_counts_are_16_4',
        'expected': 'train=16,test=4',
        'actual': f'train={train_count},test={test_count}',
        'pass': train_count == 16 and test_count == 4,
    },
])

integrity_df = pd.DataFrame(integrity_rows)
integrity_df.to_csv(FINAL_TAB / 'report_integrity_checks.csv', index=False)

print('Saved:', final_report_path)
print('Saved:', other_report_path)
print('Saved:', FINAL_TAB / 'report_integrity_checks.csv')
print('Integrity all-pass:', bool(integrity_df['pass'].all()))
if missing_figs:
    print('Missing required figures:', missing_figs)
if missing_tables:
    print('Missing required tables:', missing_tables)
if missing_image_links:
    print('Broken image refs:', missing_image_links)

