In [1]:
%load_ext autoreload
%autoreload 2
from transformer_lens import HookedTransformer
from orthogonalized_model import OrthogonalizedTransformer, generate_weight_order
import torch
from concept_erasure import LeaceEraser
from tasks.facts.SportsTask import SportsTask
from tasks import PileTask
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import get_peft_model, LoraConfig, TaskType

In [2]:
def do_relearning(model, train_tasks, n_iters, lora_kwargs={'rank': 64, 'alpha': 32, 'dropout': 0, 'target_modules': 'all-linear'}, learning_kwargs={'lr': 1e-4, 'weight_decay': 0, 'use_cosine': False}, eval_task=None):
    peft_config = LoraConfig(
        task_type=TaskType.CAUSAL_LM,
        inference_mode=False,
        r=lora_kwargs['rank'],
        lora_alpha=lora_kwargs['alpha'],
        lora_dropout=lora_kwargs['dropout'],
        target_modules = lora_kwargs['target_modules'], #["q_proj", "v_proj", 
    )

    eval_accs = []

    model = get_peft_model(model, peft_config).cuda()
    # model.print_trainable_parameters()

    optimizer = torch.optim.AdamW(model.parameters(), **learning_kwargs)
    
    if learning_kwargs['use_cosine']:
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer=optimizer, T_max=n_iters)

    for i in range(n_iters):
        optimizer.zero_grad()
        for task, task_weight in train_tasks.values():
            loss = task.get_train_loss(model) * task_weight
            # print(loss.item())
            loss.backward()

        optimizer.step()
        if learning_kwargs['use_cosine']:
            scheduler.step()
        
        if eval_task is not None:
            eval_accs.append(eval_task.get_eval_acc(model))
        
    return eval_accs

In [3]:
MODEL_NAME = "google/gemma-7b"
device = "cuda"

model = AutoModelForCausalLM.from_pretrained("orthogonalized_model").to(device)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.padding_side = "right"

Gemma's activation function should be approximate GeLU and not exact GeLU.
Changing the activation function to `gelu_pytorch_tanh`.if you want to use the legacy `gelu`, edit the `model.config` to set `hidden_activation=gelu`   instead of `hidden_act`. See https://github.com/huggingface/transformers/pull/29402 for more details.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Loaded pretrained model google/gemma-7b into HookedTransformer
Moving model to device:  cuda


In [4]:
n_relearn_iters = 20
n_relearn_athletes = 2

eval_batch_size = 64
train_batch_size = 64

relearn_sport = SportsTask(batch_size=n_relearn_athletes, tokenizer=tokenizer, forget_sport_subset={"basketball"}, forget_player_subset=n_relearn_athletes, train_test_split=False, is_forget_dataset=True)
train_pile = PileTask(batch_size=train_batch_size, tokenizer=tokenizer, device=device, ctx_length=256, shuffle=True, buffer_size=50000)
train_tasks = {"relearn_athletes": (relearn_sport, .2), "pile": (train_pile, 1)}

forget_sport_eval = SportsTask(batch_size=eval_batch_size, tokenizer=tokenizer, device=device, prep_acdcpp=False, criterion="cross_entropy", forget_sport_subset={"basketball"}, is_forget_dataset=True)

Downloading readme:   0%|          | 0.00/776 [00:00<?, ?B/s]

Resolving data files:   0%|          | 0/30 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/30 [00:00<?, ?it/s]

In [5]:
eval_accs = do_relearning(model, train_tasks={relearn_sport: 1}, n_iters=n_relearn_iters, eval_task=forget_sport_eval)

ValueError: Only instances of PreTrainedModel support `target_modules='all-linear'`

In [None]:
import matplotlib.pyplot as plt
plt.plot(eval_accs)
plt.show()