In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from accelerate import init_empty_weights
import torch

device = "cuda"


model_name = "HuggingFaceH4/zephyr-7b-beta"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16).to(device)

  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 8/8 [00:03<00:00,  2.64it/s]


In [3]:
from src.dataset import JsonlDataset

# TODO: I'm unsure about min_len but I can figure this out later maybe.
cyber_forget = JsonlDataset(
      tokenizer=tokenizer, tokenizer_max_length=1024, batch_size=1,
      min_len=30, dataset_name="cyber-forget-corpus.jsonl", dataset_folder="data/", device=device
    )

cyber_forget._load_dataset()

In [4]:
inputs = tokenizer(cyber_forget.data[0]["text"], return_tensors="pt", padding=True, truncation=True, max_length=1024).to(device)
output = model(**inputs)
print(output)

CausalLMOutputWithPast(loss=None, logits=tensor([[[-5.8985, -5.8302, -0.2193,  ..., -4.2019, -3.7009, -3.9271],
         [-7.4478, -7.6959, -1.0112,  ..., -6.6283, -3.7932, -4.8868],
         [-7.7416, -7.9730,  0.3227,  ..., -6.5928, -6.4520, -7.3018],
         ...,
         [-8.7295, -7.7559,  6.7179,  ..., -7.1722, -6.6395, -3.9135],
         [-4.4445, -4.2029,  1.9865,  ..., -4.3597, -1.1242, -2.7143],
         [-9.2293, -8.7453,  9.6163,  ..., -6.0335, -7.8609, -4.8814]]],
       device='cuda:0', grad_fn=<UnsafeViewBackward0>), past_key_values=<transformers.cache_utils.DynamicCache object at 0x760f6a6d1fd0>, hidden_states=None, attentions=None)


In [5]:
print(output.logits.shape)

torch.Size([1, 1024, 32000])


In [6]:
# import torch
# torch.cuda.empty_cache()

In [7]:
print(cyber_forget[0])
print(cyber_forget[0]["input_ids"].shape)

{'input_ids': tensor([    1, 15549,   352,  ..., 28781, 15834,  6291], device='cuda:0'), 'attention_mask': tensor([1, 1, 1,  ..., 1, 1, 1], device='cuda:0')}
torch.Size([1024])


In [8]:
tokenized_inputs = tokenizer(cyber_forget.data[0]["text"], return_tensors="pt", padding=True, truncation=True, max_length=1024).to(device)
model(**tokenized_inputs)

CausalLMOutputWithPast(loss=None, logits=tensor([[[-5.8985, -5.8302, -0.2193,  ..., -4.2019, -3.7009, -3.9271],
         [-7.4478, -7.6959, -1.0112,  ..., -6.6283, -3.7932, -4.8868],
         [-7.7416, -7.9730,  0.3227,  ..., -6.5928, -6.4520, -7.3018],
         ...,
         [-8.7295, -7.7559,  6.7179,  ..., -7.1722, -6.6395, -3.9135],
         [-4.4445, -4.2029,  1.9865,  ..., -4.3597, -1.1242, -2.7143],
         [-9.2293, -8.7453,  9.6163,  ..., -6.0335, -7.8609, -4.8814]]],
       device='cuda:0', grad_fn=<UnsafeViewBackward0>), past_key_values=<transformers.cache_utils.DynamicCache object at 0x760f6a6c7bf0>, hidden_states=None, attentions=None)

In [9]:
import torch

class Model():
  def __init__(self, model, tokenizer, device, seed = 42):
    self.model = model.to(device)
    self.tokenizer = tokenizer
    self.device = device
    self.seed = seed
    torch.manual_seed(seed)
    self.activations = {}

  def hook_fn(self, module, input, output):
    self.activations["transformer_block_output"] = output[0].detach()
  
  def forward(self, inputs, layer_idx: int):
    if layer_idx >= len(self.model.model.layers):
      raise ValueError(f"Layer index {layer_idx} is out of bounds for the model. The model has {len(self.model.transformer.h)} layers.")
    try:
      hook = self.model.model.layers[layer_idx].register_forward_hook(self.hook_fn)
      tokenized_inputs = self.tokenizer(inputs, return_tensors="pt").to(self.device)
      # print(inputs["input_ids"].shape)
      with torch.no_grad():
        _ = self.model(**tokenized_inputs)
    finally:
      hook.remove()
    return self.activations["transformer_block_output"]

In [10]:
test_u = torch.randn(1024, 768)
test_u = test_u / torch.linalg.norm(test_u, dim=-1, keepdim=True)
print(test_u.shape)
print(test_u)

torch.Size([1024, 768])
tensor([[-0.0369,  0.0372, -0.0026,  ..., -0.0416, -0.0009,  0.0819],
        [-0.0140,  0.0459, -0.0427,  ..., -0.0310,  0.0112, -0.0398],
        [-0.0125,  0.0393,  0.0028,  ..., -0.0679,  0.0845,  0.0271],
        ...,
        [ 0.0236, -0.0232,  0.0117,  ...,  0.0443, -0.0140,  0.0333],
        [ 0.0090,  0.0066,  0.0128,  ..., -0.0061, -0.0057, -0.0025],
        [ 0.0112,  0.0058,  0.0366,  ..., -0.0680, -0.0136, -0.0345]])


In [13]:
import torch
import numpy as np
from torch.utils.data import DataLoader
from src.dataset import JsonlDataset
import tqdm
import copy

class RMU:
  def __init__(self, model, tokenizer, datasets, device, alpha, lr, c, hidden_dimension_size, tokenizer_max_length, min_len, layer_idx, seed = 42):
    self.unlearned_model = Model(model, tokenizer, device, seed)
    self.frozen_model = copy.deepcopy(self.unlearned_model)
    self.tokenizer = tokenizer
    self.datasets = datasets
    self.device = device
    self.alpha = alpha
    self.lr = lr
    self.c = c
    self.tokenizer_max_length = tokenizer_max_length
    self.min_len = min_len
    self.seed = seed
    self.hidden_dimension_size = hidden_dimension_size
    self.layer_idx = layer_idx
    np.random.seed(seed)
    torch.manual_seed(seed)
    
    self.freeze_layers_in_unlearned_model([self.layer_idx-2, self.layer_idx-1, self.layer_idx])

    # Initialize random unit vector u
    some_big_number = 15000
    u = torch.randn(some_big_number, self.hidden_dimension_size).to(self.device)
    u = u / torch.linalg.norm(u, dim=-1, keepdim=True).to(self.device)
    self.u = u

  def freeze_layers_in_unlearned_model(self, unfreeze_layers: list[int]):
    # Validation
    for layer in unfreeze_layers:
      assert layer >= 0 and layer < len(self.unlearned_model.model.model.layers)

    # Freeze all layers first
    for param in self.unlearned_model.model.parameters():
      param.requires_grad = False

    # Unfreeze the specified layers
    for layer in unfreeze_layers:
      for param in self.unlearned_model.model.model.layers[layer].parameters():
        param.requires_grad = True
    

  def retain_loss(self, act_retain, act_forget):
    l2_squared = torch.sum((act_retain - act_forget) ** 2, dim=-1)
    final = torch.mean(l2_squared)
    return final

  def forget_loss(self, act_updated):
    print(act_updated.shape)
    print(self.u.shape)
    print(self.u[:len(act_updated[0]), :].shape)
    l2_squared = torch.sum((act_updated - self.c * self.u[:len(act_updated[0]), :]) ** 2, dim=-1)
    final = torch.mean(l2_squared)
    return final

  def rmu_step(self, d_forget, d_retain, layer_idx):
    print("Beginning RMU step...")
    cyber_forget = JsonlDataset(
      tokenizer=self.tokenizer, tokenizer_max_length=self.tokenizer_max_length, batch_size=1,
      min_len=self.min_len, dataset_name="cyber-forget-corpus.jsonl", dataset_folder="data/", device=self.device
    )
    cyber_forget._load_dataset()
    cyber_retain = JsonlDataset(
      tokenizer=self.tokenizer, tokenizer_max_length=self.tokenizer_max_length, batch_size=1,
      min_len=self.min_len, dataset_name="cyber-retain-corpus.jsonl", dataset_folder="data/", device=self.device
      )
    cyber_retain._load_dataset()

    # Retain loss
    for i in tqdm.tqdm(range(len(cyber_retain.data))):
      print(len(cyber_retain.data[i]["text"]))
      act_updated = self.unlearned_model.forward(cyber_retain.data[i]["text"], layer_idx)
      with torch.no_grad():
        act_frozen = self.frozen_model.forward(cyber_retain.data[i]["text"], layer_idx)
      retain_loss = self.retain_loss(act_updated, act_frozen)
      print(retain_loss)
      break

    # Forget loss
    for i in tqdm.tqdm(range(len(cyber_forget.data))):
      act_updated = self.unlearned_model.forward(cyber_forget.data[i]["text"], layer_idx)
      forget_loss = self.forget_loss(act_updated)
      print(forget_loss)
      break

    full_loss = forget_loss + self.alpha * retain_loss
    optimizer = torch.optim.AdamW(self.unlearned_model.model.parameters(), lr=self.lr)
    optimizer.zero_grad()
    full_loss.backward()
    optimizer.step()

    print("Finished RMU step...")

In [12]:
my_rmu = RMU(model, tokenizer, [], device, 0.01, 0.001, 1, 4096, 1024, 30, 42)
my_rmu.rmu_step(d_forget=None, d_retain=None, layer_idx=10)

OutOfMemoryError: CUDA out of memory. Tried to allocate 500.00 MiB. GPU 0 has a total capacity of 79.19 GiB of which 231.06 MiB is free. Including non-PyTorch memory, this process has 78.96 GiB memory in use. Of the allocated memory 78.35 GiB is allocated by PyTorch, and 25.43 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [10]:
my_model = Model(model, tokenizer, device, 42)
output = my_model.forward(cyber_forget.data[0]["text"], 10)

In [13]:
print(cyber_forget[0]["input_ids"].shape)
print(output)
print(output.shape)

torch.Size([1024])
tensor([[[-0.1184, -0.2648, -0.1554,  ..., -0.0136,  0.0858, -0.0389],
         [ 0.0420,  0.0390,  0.0470,  ...,  0.0355, -0.0068, -0.0307],
         [-0.0149, -0.0296, -0.0170,  ...,  0.0219,  0.0518,  0.0417],
         ...,
         [ 0.0162, -0.0176,  0.0176,  ...,  0.0365, -0.0607, -0.0196],
         [-0.0113, -0.0102,  0.0417,  ...,  0.0305,  0.0347,  0.0183],
         [-0.0059,  0.0050, -0.0029,  ...,  0.0445, -0.0131, -0.0024]]],
       device='cuda:0')
torch.Size([1, 13157, 4096])
