<a href="https://colab.research.google.com/github/kmeng01/rome/blob/main/notebooks/rome.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" align="left"/></a>&nbsp;or in a local notebook.

In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "2"  # only GPUs 0 and 1 will be visible

In [2]:
IS_COLAB = False
ALL_DEPS = False
try:
    import google.colab, torch, os

    IS_COLAB = True
    os.chdir("/content/rome")
    if not torch.cuda.is_available():
        raise Exception("Change runtime type to include a GPU.")
except ModuleNotFoundError as _:
    pass

# Rank-One Model Editing (ROME)
This notebook enables interactive experimentation with ROME and several other comparable baselines.
The goal is to write new facts (e.g. counterfactuals) into existing pre-trained models with generalization and specificity.

In [3]:
%load_ext autoreload
%autoreload 2

In [4]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

from util import nethook
from util.generate import generate_interactive, generate_fast

from experiments.py.demo import *

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
# MODEL_NAME = "Qwen/Qwen2-0.5B"  # gpt2-{medium,large,xl} or EleutherAI/gpt-j-6B


In [6]:
# model, tok = (
#     AutoModelForCausalLM.from_pretrained(MODEL_NAME, low_cpu_mem_usage=IS_COLAB).to(
#         "cuda"
#     ),
#     AutoTokenizer.from_pretrained(MODEL_NAME),
# )
# tok.pad_token = tok.eos_token

# MODEL_NAME = "gpt2-xl"#"Qwen/Qwen2-0.5B"  # gpt2-{medium,large,xl} or EleutherAI/gpt-j-6B
# model2, tok2 = (
#     AutoModelForCausalLM.from_pretrained(MODEL_NAME, low_cpu_mem_usage=IS_COLAB).to(
#         "cuda"
#     ),
#     AutoTokenizer.from_pretrained(MODEL_NAME),
# )

# MODEL_NAME = "EleutherAI/gpt-j-6B"  # gpt2-{medium,large,xl} or EleutherAI/gpt-j-6B
# modelj, tokj = (
#     AutoModelForCausalLM.from_pretrained(MODEL_NAME, low_cpu_mem_usage=IS_COLAB).to(
#         "cuda"
#     ),
#     AutoTokenizer.from_pretrained(MODEL_NAME),
# )


In [7]:
# print(model)
# print("#########################################################################################")
# print(model2)
# print("#########################################################################################")
# print(modelj)

A requested rewrite can be specified using `request`. `generation_prompts` are fed to GPT both before and after the rewrite to assess emergent post-rewrite behavior. See the bottom of this notebook for more examples.


In [8]:
request = [
    {
        "prompt": "{} was the founder of",
        "subject": "Steve Jobs",
        "target_new": {"str": "Microsoft"},
    }
]

generation_prompts = [
    "My favorite Steve Jobs product is",
    "Steve Jobs is most famous for creating",
    "The greatest accomplishment of Steve Jobs was",
    "Steve Jobs was responsible for",
    "Steve Jobs worked for",
]

In [9]:
MODEL_NAME = "Qwen/Qwen2-0.5B"#"Qwen/Qwen2-0.5B" #"gpt2-xl"
if MODEL_NAME=="q2":
    MODEL_NAME = "Qwen/Qwen2-0.5B"
elif MODEL_NAME=="g2xl":
    MODEL_NAME = "gpt2-xl"
elif MODEL_NAME=="gj":
    MODEL_NAME = "EleutherAI/gpt-j-6B"
model, tok = (
    AutoModelForCausalLM.from_pretrained(MODEL_NAME, low_cpu_mem_usage=IS_COLAB).to(
        "cuda"
    ),
    AutoTokenizer.from_pretrained(MODEL_NAME),
)
tok.pad_token = tok.eos_token
ALG_NAME = "ROME_MODIFIED"
#################################################################################################
if MODEL_NAME in ["Qwen/Qwen2-0.5B"]:
    model.config.n_positions = model.config.max_position_embeddings 
    model.config.n_embd = model.config.hidden_size
#################################################################################################

Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.


In [None]:
model.config.max_position_embeddings

131072

: 

In [None]:
# Restore fresh copy of model
try:
    with torch.no_grad():
        for k, v in orig_weights.items():
            nethook.get_parameter(model, k)[...] = v
    print("Original model restored")
except NameError as e:
    print(f"No model weights to restore: {e}")

# Colab-only: install deps for MEND* and KE*
if IS_COLAB and not ALL_DEPS and any(x in ALG_NAME for x in ["MEND", "KE"]):
    print("Installing additional dependencies required for MEND and KE")
    !pip install -r /content/rome/scripts/colab_reqs/additional.txt >> /content/install.log 2>&1
    print("Finished installing")
    ALL_DEPS = True

# Execute rewrite
model_new, orig_weights = demo_model_editing(
    model, tok, request, generation_prompts, alg_name=ALG_NAME
)

No model weights to restore: name 'orig_weights' is not defined

##############################################
#                                            #
#  Retrieving ROME_MODIFIED hyperparameters  #
#                                            #
##############################################
Loading from hparams/ROME/gpt2-xl.json
ROMEHyperParams(layers=[13], fact_token='subject_last', v_num_grad_steps=20, v_lr=0.5, v_loss_layer=47, v_weight_decay=0.5, clamp_norm_factor=4, kl_factor=0.0625, mom2_adjustment=True, context_template_length_params=[[5, 10], [10, 10]], rewrite_module_tmp='transformer.h.{}.mlp.c_proj', layer_module_tmp='transformer.h.{}', mlp_module_tmp='transformer.h.{}.mlp', attn_module_tmp='transformer.h.{}.attn', ln_f_module='transformer.ln_f', lm_head_module='transformer.wte', mom2_dataset='wikipedia', mom2_n_samples=100000, mom2_dtype='float32')

################################
#                              #
#  Generating pre-update text  #
#                   

  0%|          | 0/1000 [00:00<?, ?it/s]


Left vector shape: torch.Size([6400])


This SAE has non-empty model_from_pretrained_kwargs. 
For optimal performance, load the model like so:
model = HookedSAETransformer.from_pretrained_no_processing(..., **cfg.model_from_pretrained_kwargs)


Computing right vector (v)
Lookup index found: 1 | Sentence: Steve Jobs was the founder of | Token:  Jobs
Rewrite layer is 13
Tying optimization objective to 47
Recording initial value of v*
#############################################################################################################
ITERATION 1
loss 7.192 = 7.192 + 0.0 + 0.0 avg prob of [ Microsoft] 0.0009119583410210907
Number of actives featurs in delta: Count: 0, Percentage: 0.00%, Mean change value: UNDEFINED
#############################################################################################################
#############################################################################################################
ITERATION 2
loss 3.254 = 0.664 + 0.5 + 0.09 avg prob of [ Microsoft] 0.5865025520324707
Number of actives featurs in delta: Count: 14282, Percentage: 55.79%, Mean change value: 0.49996405839920044
##################################################################################################