In [1]:
# https://github.com/Suzehva/time_in_language_models_current/blob/main/ii_accuracy/ii_accuracy.py

In [2]:
import numpy as np
import random
import torch

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

set_seed(0)

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.1.2 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/nlp/scr/aditijb/miniconda3/envs/time-env/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/nlp/scr/aditijb/miniconda3/envs/time-env/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/nlp/scr/aditijb/miniconda3/envs/time-env/lib/python3.10/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/nlp/scr/aditijb/miniconda3/envs/time-env/lib/python3.10/site-packages/traitlets/con

## Create all prompts where interchanging should cause the model to switch tenses (past/present/future)

In [3]:
# from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer

model_id = "allenai/OLMo-2-0425-1B"
revision = None

USER = 'aditijb'

DATA_DIR = f'/nlp/scr/{USER}/data'
MODEL_DIR = f'/nlp/scr/{USER}/models'


## Heatmap with one prompt
    

In [4]:
import pandas as pd
import pyvene
from pyvene import embed_to_distrib, top_vals, format_token
from pyvene import RepresentationConfig, IntervenableConfig, IntervenableModel
from pyvene import VanillaIntervention

%config InlineBackend.figure_formats = ['svg']
from plotnine import (
    ggplot,
    geom_tile,
    aes,
    facet_wrap,
    theme,
    element_text,
    geom_bar,
    geom_hline,
    scale_y_log10,
)

config, tokenizer, olmo = pyvene.create_olmo2(name="allenai/OLMo-2-0425-1B")


AttributeError: module 'transformers.models' has no attribute 'llava'

In [None]:

def simple_position_config(model_type, component, layer):
    config = IntervenableConfig(
        model_type=model_type,
        representations=[
            RepresentationConfig(
                layer,              # layer
                component,          # component
                "pos",              # intervention unit
                1,                  # max number of unit
            ),
        ],
        intervention_types=VanillaIntervention,
    )
    return config


base = tokenizer("In 1981 there", return_tensors="pt")
sources = [tokenizer("In 2023 there", return_tensors="pt")]
tokens = tokenizer.encode(" was will")
data = []

for layer_i in range(olmo.config.num_hidden_layers):
    config = simple_position_config(type(olmo), "block_output", layer_i) # TODO don't use MLP
    intervenable = IntervenableModel(config, olmo)
    for pos_i in range(len(base.input_ids[0])):
        
        _, counterfactual_outputs = intervenable(
            base, sources, {"sources->base": pos_i}
        )
        logits = counterfactual_outputs.logits
        distrib = torch.softmax(logits, dim=-1)
        # distrib = embed_to_distrib(
        #     olmo, counterfactual_outputs.hidden_states[-1], logits=False
        # )
        # Get the token at the intervention position
        intervention_token_id = base.input_ids[0][pos_i].item()
        intervention_token = format_token(tokenizer, intervention_token_id)
        

        for token in tokens:
            data.append(
                {
                    "token": format_token(tokenizer, token),
                    "prob": float(distrib[0][-1][token]),
                    "layer": f"f{layer_i}",
                    "pos": pos_i,
                    "intervention_token": intervention_token,  # Added this line
                    "type": "block_output",
                }
            )

df = pd.DataFrame(data)

In [None]:
import torch
import pandas as pd
from plotnine import ggplot, aes, geom_tile, facet_wrap, theme, element_text, labs, options

# Layers categorical (0 at bottom, max at top)
layer_nodes = [f"f{l}" for l in range(olmo.config.num_hidden_layers)]
df["layer"] = pd.Categorical(df["layer"], categories=layer_nodes, ordered=True)

# Tokens categorical
df["token"] = df["token"].astype("category")

# Build x-axis labels: show swapped tokens when base != source
base_ids = base.input_ids[0]
source_ids = sources[0].input_ids[0]

base_tokens = [format_token(tokenizer, tid.item()) for tid in base_ids]
source_tokens = [format_token(tokenizer, tid.item()) for tid in source_ids]

x_labels = []
for b_tok, s_tok in zip(base_tokens, source_tokens):
    if b_tok != s_tok:
        x_labels.append(f"{b_tok} <- {s_tok}")
    else:
        x_labels.append(b_tok)

# Map df intervention tokens to these labels
df["intervention_token"] = pd.Categorical(
    [x_labels[pos] for pos in df["pos"]],
    categories=x_labels,
    ordered=True
)

# Plot
g = (
    ggplot(df)
    + geom_tile(aes(x="intervention_token", y="layer", fill="prob"), raster=False)
    + facet_wrap("~token")
    + theme(axis_text_x=element_text(rotation=90, ha='right'))
    + labs(x="Intervention Token (base ← source)", y="Layer", fill="Probability")
)

options.figure_size = (12, 6)  # match first plot
g
