In [1]:
# ======================================================================
#  Chain-of-Thought category probe – notebook driver
# ======================================================================
# 1.  Global configuration ------------------------------------------------
%cd ../..
%pwd
from pathlib import Path

MODEL_PATH      = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"      # HF hub or local dir
GENERAL_DIR = Path("c_cluster_analysis/outputs/hints/mmlu/DeepSeek-R1-Distill-Llama-8B")
CATEGORY_FILE   = Path(GENERAL_DIR / "confidence" / "none_unverb_5001.json")                    # ↳ annotation JSON
COT_FILE   = Path(GENERAL_DIR / "orig" / "none_5001.json")                    # ↳ annotation JSON
MAIN_CATEGORIES = ["backtracking", "logical_deduction"]           # target label(s)
LAYERS          = list(range(1, 33, 5))                           # every 5-th layer
MAX_SAMPLES     = 1                                            # or e.g. 200
WHITELIST       = None                                            # path to JSON list of q-ids
CAPTURE_FILE   = Path(GENERAL_DIR / "layprobe" / "none_unverb_5001.json")                    # ↳ annotation JSON
CAPTURE_FILE    = Path("outputs/hidden_capture.json")             # raw vectors
ATTRVEC_DIR   = Path(GENERAL_DIR / "attr_vecs" / "none_unverb_5001.json")                    # ↳ annotation JSON

# 2.  Imports & helpers ---------------------------------------------------
import json, logging
logging.basicConfig(level=logging.INFO)

from c_cluster_analysis.cat_probe_5.cot_probe_utils import (
    load_model_and_tokenizer,
    gather_category_sentences,
    run_probe_capture_for_categories,
    train_linear_probes,
    save_attribute_vectors,
)

# 3.  Model / tokenizer ---------------------------------------------------
model, tok, _, _ = load_model_and_tokenizer(MODEL_PATH)

  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


/root/CoTFaithChecker


  from .autonotebook import tqdm as notebook_tqdm
INFO:root:Loading deepseek-ai/DeepSeek-R1-Distill-Llama-8B on cuda
INFO:accelerate.utils.modeling:We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).
Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00,  1.09it/s]


In [4]:
#### ONLY FOR TESTING RE-DEFINED
MAX_SAMPLES = 5

# 4.  Build the sentence-selection map ------------------------------------
selection_map = gather_category_sentences(
    CATEGORY_FILE,
    main_categories = MAIN_CATEGORIES,
    whitelist       = WHITELIST,
    max_samples     = MAX_SAMPLES,
)

print(f"Capturing {sum(len(v) for v in selection_map.values())} "
      f"sentences across {len(selection_map)} questions")

# 5.  Hidden-state capture -------------------------------------------------
captured = run_probe_capture_for_categories(
    model              = model,
    tok                = tok,
    cot_file           = COT_FILE,
    selection_map      = selection_map,
    layers             = LAYERS,
    output_file        = CAPTURE_FILE,
)

# 6.  Linear-probe training -----------------------------------------------
probes, metrics = train_linear_probes(
    captured["vectors"],
    captured["labels"],
    test_size      = 0.2,
    random_state   = 42,
)

print("\n=== Probe results (weighted F1) ===")
for ln in sorted(metrics):
    print(f"{ln:>8}:  acc {metrics[ln]['accuracy']:.3f}   "
          f"f1 {metrics[ln]['f1']:.3f}")

# 7.  (optional) save attribute vectors ------------------------------------
save_attribute_vectors(captured["attr_vecs"], ATTRVEC_DIR)
print(f"\nFinished – vectors in {CAPTURE_FILE}")


INFO:root:Kept the first 5 questions (max_samples)


Capturing 78 sentences across 5 questions


INFO:root:Saved capture to outputs/hidden_capture.json



=== Probe results (weighted F1) ===
 layer_1:  acc 0.875   f1 0.817
layer_11:  acc 1.000   f1 1.000
layer_16:  acc 1.000   f1 1.000
layer_21:  acc 1.000   f1 1.000
layer_26:  acc 1.000   f1 1.000
layer_31:  acc 1.000   f1 1.000
 layer_6:  acc 1.000   f1 1.000

Finished – vectors in outputs/hidden_capture.json
