In [1]:
import torch, platform, sys

print("Python:", sys.version)
print("PyTorch:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0))


Python: 3.12.12 (main, Oct 10 2025, 08:52:57) [GCC 11.4.0]
PyTorch: 2.9.0+cu126
CUDA available: True
GPU: NVIDIA A100-SXM4-80GB


In [2]:
!pip install -U "transformers>=4.37.0" accelerate einops tiktoken \
    umap-learn scikit-learn matplotlib

# If you want a fresh torch, uncomment (Colab usually has a good version already)
# !pip install -U torch --index-url https://download.pytorch.org/whl/cu121




In [3]:
import os

# Clone (skip if you already have it)
if not os.path.exists("protoInterpretation"):
    !git clone https://github.com/Nonempirical/protoInterpretation protoInterpretation

%cd protoInterpretation

# Editable install
!pip install -e .


Cloning into 'protoInterpretation'...
remote: Enumerating objects: 122, done.[K
remote: Counting objects: 100% (122/122), done.[K
remote: Compressing objects: 100% (71/71), done.[K
remote: Total 122 (delta 59), reused 98 (delta 35), pack-reused 0 (from 0)[K
Receiving objects: 100% (122/122), 53.04 KiB | 17.68 MiB/s, done.
Resolving deltas: 100% (59/59), done.
/content/protoInterpretation
Obtaining file:///content/protoInterpretation
  Preparing metadata (setup.py) ... [?25l[?25hdone
Installing collected packages: protoInterpretation
  Running setup.py develop for protoInterpretation
Successfully installed protoInterpretation-0.1.0


In [4]:
from src.protoInterpretation import (
    ModelWrapper,
    SamplingConfig,
    sample_chain,
    compute_horizon_metrics,
    project_step_embeddings,
    plot_entropy_curve,
    plot_horizon_width,
    plot_step_scatter_2d,
    save_batch_npz,
    save_metrics_json,
)
from src.protoInterpretation.model import HFModelAdapter, HFModelConfig

from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import matplotlib.pyplot as plt
import numpy as np

In [5]:
from huggingface_hub import login
from google.colab import userdata

HF_TOKEN = userdata.get("hfKey")  # your Colab secret name
login(token=HF_TOKEN)


In [6]:
model_name = "meta-llama/Meta-Llama-3.1-8B"

tokenizer = AutoTokenizer.from_pretrained(model_name)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

hf_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

# ðŸ”´ REQUIRED for output_attentions=True
hf_model.set_attn_implementation("eager")

model = HFModelAdapter.__new__(HFModelAdapter)
model.config = HFModelConfig(
    model_name_or_path=model_name,
    dtype="bfloat16"
)
model.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.tokenizer = tokenizer
model.pad_token_id = tokenizer.pad_token_id
model.model = hf_model
model.tokenizer.padding_side = "left"


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/50.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/826 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/185 [00:00<?, ?B/s]

In [7]:
from src.protoInterpretation import SamplingConfig  # adjust import if yours differs

prompts = [
    # --- OPEN ---
    "A bat is",
    "The woman in the blue dress",
    "I saw",
    "Something happens when there exists",
    "The man",
    "The man in the street",

    # --- CLOSED ---
    "A pencil is",
    "Napoleon is",
    "Photosynthesis is the process where",
    "The declaration of Independence, formally",
    "Photosynthesis is",
    "Erosion is",
]

cfg = SamplingConfig(
    num_chains=256,
    max_steps=32,
    temperature=0.9,
    top_k=0,
    top_p=0.9,
    seed=42,
    store_topk_logits=50,
    store_attention_weights=True,
)

In [8]:
import os
import re
from datetime import datetime

USE_GOOGLE_DRIVE = True  # toggle

if USE_GOOGLE_DRIVE:
    from google.colab import drive
    drive.mount("/content/drive", force_remount=False)
    BASE_RUN_DIR = "/content/drive/MyDrive/protoInterpretation-runs"
else:
    BASE_RUN_DIR = "./runs"

from src.protoInterpretation import (
    sample_chain,
    compute_horizon_metrics,
    save_batch_npz,
    save_metrics_json,
)

def slugify_prompt(text: str, max_len: int = 60) -> str:
    """
    "The bat is in :" -> "the_bat_is_in"
    Safe for filenames. Truncates to max_len.
    """
    s = text.strip().lower()
    s = re.sub(r"[^a-z0-9]+", "_", s)     # non-alnum -> _
    s = re.sub(r"_+", "_", s).strip("_") # collapse + trim
    return (s[:max_len].rstrip("_")) or "empty_prompt"

def save_horizon_run_from_prompt(prompt_text: str, batch, metrics):
    os.makedirs(BASE_RUN_DIR, exist_ok=True)

    slug = slugify_prompt(prompt_text)
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    run_id = f"{slug}_{timestamp}"   # e.g. the_bat_is_in_20251218_121530
    run_dir = os.path.join(BASE_RUN_DIR, run_id)
    os.makedirs(run_dir, exist_ok=True)

    save_batch_npz(batch, os.path.join(run_dir, "batch.npz"))
    save_metrics_json(metrics, os.path.join(run_dir, "metrics.json"), batch_meta=batch.meta)

    print(f"Saved â†’ {run_dir}")

def run_prompts_and_save(model, prompts, cfg):
    for p in prompts:
        batch = sample_chain(model, p, cfg)
        metrics = compute_horizon_metrics(batch)
        save_horizon_run_from_prompt(p, batch, metrics)

# Run everything
run_prompts_and_save(model, prompts, cfg)


Mounted at /content/drive
Saved â†’ /content/drive/MyDrive/protoInterpretation-runs/a_bat_is_20260112_193014
Saved â†’ /content/drive/MyDrive/protoInterpretation-runs/the_woman_in_the_blue_dress_20260112_193030
Saved â†’ /content/drive/MyDrive/protoInterpretation-runs/i_saw_20260112_193043


KeyboardInterrupt: 