# Editing a T5 QA model with GRACE

In [1]:
import grace
from grace.editors import GRACE_barebones as GRACE
from grace.utils import tokenize_qa
import torch
import copy
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

In [2]:
model = AutoModelForSeq2SeqLM.from_pretrained("google/t5-small-ssm-nq")
tokenizer = AutoTokenizer.from_pretrained("google/t5-small-ssm-nq")

In [3]:
layer_to_edit = "encoder.block[4].layer[1].DenseReluDense.wo" # Which layer to edit?
init_epsilon = 3.0 # Initial epsilon for GRACE codebook entries
learning_rate = 1.0 # Learning rate with which to learn new GRACE values
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)
original_model = copy.deepcopy(model)

In [4]:
# --- wrap model with GRACE ---
edited_model = GRACE(model, layer_to_edit, init_epsilon, learning_rate, device, generation=False)

In [10]:
# --- Desired edit ---
edit_input = {
    "text": ["How tall is the empire state building?"],
    "labels": ["1,454 feet"],
}

edit_tokens = tokenize_qa(edit_input, tokenizer, device)

# --- Check model's prediction for this edit before applying the edit ---
preds = original_model.generate(edit_tokens["input_ids"]).squeeze()
original_answer = tokenizer.decode(preds, skip_special_tokens=True)
print(f"Before Editing. Question: {edit_input['text'][0]}. Answer: {original_answer}")

Before Editing. Question: How tall is the empire state building?. Answer: 71 ft


In [6]:
# --- Apply the edit ---
edited_model.edit(edit_tokens)

In [7]:
# --- Check model's prediction for this edit AFTER applying the edit ---
preds = edited_model.generate(edit_tokens["input_ids"]).squeeze()
new_answer = tokenizer.decode(preds, skip_special_tokens=True)
print(f"After Editing. Question: {edit_input['text'][0]}. Answer: {new_answer}")

After Editing. Question: How tall is the empire state building?. Answer: 1,454 feet


In [8]:
# --- Trying slightly different input text ---
test_input = {
    "text": ["how high is the empire state building?"],
    "labels": ["1,454 feet"]
}

test_tokens = tokenize_qa(test_input, tokenizer, device)

preds = edited_model.generate(test_tokens["input_ids"], max_length=20).squeeze()
new_answer = tokenizer.decode(preds, skip_special_tokens=True)
print(f"After Editing. Question: {test_input['text'][0]}. Answer: {new_answer}")

After Editing. Question: how high is the empire state building?. Answer: 57 ft


In [9]:
# --- Check if the original and edited model have the same prediction on an unrelated input ---
unrelated_input = {
    "text": ["How tall is the eiffel tower?"],
    "labels": ["1,083 feet"]
}

unrelated_tokens = tokenize_qa(unrelated_input, tokenizer, device)

preds = original_model.generate(unrelated_tokens["input_ids"]).squeeze()
new_answer = tokenizer.decode(preds, skip_special_tokens=True)
print(f"Before Editing. Question: {unrelated_input['text'][0]}. Answer: {new_answer}")

preds = edited_model.generate(unrelated_tokens["input_ids"]).squeeze()
new_answer = tokenizer.decode(preds, skip_special_tokens=True)
print(f"After Editing. Question: {unrelated_input['text'][0]}. Answer: {new_answer}")

Before Editing. Question: How tall is the eiffel tower?. Answer: 157 ft
After Editing. Question: How tall is the eiffel tower?. Answer: 157 ft
