In [1]:
%load_ext autoreload
%autoreload 2

import sys
import os
import dotenv
from pathlib import Path

env_file = "../.env"

if os.path.exists(env_file):
    dotenv.load_dotenv(env_file, verbose=True)
    print("Loaded environment variables from .env file.")

cwd = os.getcwd()
# for some reason appending to PATH you need it to be string
sys.path.append(str(Path(cwd).parent / "src"))
hf_access_token = os.getenv("HUGGINGFACE_API_KEY")

Loaded environment variables from .env file.


In [2]:
import torch
from research_tools import get_gpus_available
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import LlamaForCausalLM, LlamaTokenizer


os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in get_gpus_available()])
model_dtype = torch.bfloat16
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
assert device.type == "cuda", "No GPU available."

model_name = "meta-llama/Meta-Llama-3-8B"

model: LlamaForCausalLM = AutoModelForCausalLM.from_pretrained(
    model_name, token=hf_access_token, torch_dtype=model_dtype
)
model = model.to(device)

tokenizer: LlamaTokenizer = AutoTokenizer.from_pretrained(
    model_name, token=hf_access_token
)
tokenizer.pad_token_id = tokenizer.eos_token_id
tokenizer.padding_side = "left"

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [3]:
# from peft import get_peft_model, LoraConfig


# lora_rank = 64
# lora_alpha = 8

# lora_config = LoraConfig(
#     r=lora_rank,
#     lora_alpha=lora_alpha,
#     target_modules=["q_proj", "v_proj"],
# )

# model = get_peft_model(model, lora_config)

In [4]:
from unlearn_order.dataset import load_dataset

data_dir = Path("../data/random_bd")

splits = list(range(10))
n_train = 1
n_val = 1

train_files = [f"split_{splits[i]}.jsonl" for i in range(n_train)]
val_files = [f"split_{splits[i]}.jsonl" for i in range(n_train, n_train + n_val)]
combined_files = train_files + val_files

train_dataset = load_dataset(data_dir, train_files)
val_dataset = load_dataset(data_dir, val_files)
combined_dataset = load_dataset(data_dir, combined_files)

In [None]:
from unlearn_order.pipeline import run_pipeline

batch_size = 4
tolerance = 0.01
lr = 3e-6
max_epochs = 100

run_pipeline(
    model,
    tokenizer,
    [
        ("f", "combined", combined_dataset),
        ("u", "unlearn", combined_dataset),
        ("e", "eval_train", train_dataset),
        ("e", "eval_val", val_dataset),
        ("f", "retrain_train", train_dataset),
        ("e", "eval_train", train_dataset),
        ("e", "eval_val", val_dataset),
    ],
    batch_size=batch_size,
    tolerance=tolerance,
    lr=lr,
    max_epochs=max_epochs,
)

  5%|▌         | 5/100 [01:56<39:20, 24.85s/it]

Epoch 5 loss: 0.067678130688561 acc: 0.3885350318471338


 10%|█         | 10/100 [03:52<37:13, 24.81s/it]

Epoch 10 loss: 0.024128436723391815 acc: 0.8248407643312102


 15%|█▌        | 15/100 [05:48<35:11, 24.85s/it]

Epoch 15 loss: 0.011387015568781318 acc: 0.9299363057324841


 20%|██        | 20/100 [07:44<33:07, 24.84s/it]

Epoch 20 loss: 0.007656913969284033 acc: 0.9140127388535032


 25%|██▌       | 25/100 [09:39<30:59, 24.80s/it]

Epoch 25 loss: 0.005882953856506073 acc: 0.8312101910828026


 30%|███       | 30/100 [11:35<28:57, 24.82s/it]

Epoch 30 loss: 0.0038985876173660036 acc: 0.9394904458598726


 35%|███▌      | 35/100 [13:31<26:54, 24.84s/it]

Epoch 35 loss: 0.0023566814003628863 acc: 0.9681528662420382


 40%|████      | 40/100 [15:27<24:51, 24.85s/it]

Epoch 40 loss: 0.0009545969321650565 acc: 0.9745222929936306


 45%|████▌     | 45/100 [17:23<22:43, 24.78s/it]

Epoch 45 loss: 0.001112783054808298 acc: 0.9745222929936306


 50%|█████     | 50/100 [19:19<20:40, 24.81s/it]

Epoch 50 loss: 0.0032619109701635973 acc: 0.9777070063694268


 55%|█████▌    | 55/100 [21:14<18:36, 24.82s/it]

Epoch 55 loss: 0.0010703751127183964 acc: 0.9681528662420382


 60%|██████    | 60/100 [23:10<16:31, 24.79s/it]

Epoch 60 loss: 0.00123742042164357 acc: 0.9745222929936306


 65%|██████▌   | 65/100 [25:06<14:28, 24.81s/it]

Epoch 65 loss: 0.0009849233808039756 acc: 0.9681528662420382


 70%|███████   | 70/100 [27:02<12:25, 24.85s/it]

Epoch 70 loss: 0.0006994331834283431 acc: 0.9713375796178344


 75%|███████▌  | 75/100 [28:58<10:21, 24.84s/it]

Epoch 75 loss: 0.00021909759546760588 acc: 0.9681528662420382


 80%|████████  | 80/100 [30:54<08:16, 24.80s/it]

Epoch 80 loss: 0.0030742255582088974 acc: 0.9554140127388535


 85%|████████▌ | 85/100 [32:50<06:12, 24.83s/it]

Epoch 85 loss: 0.0016160748887304394 acc: 0.9299363057324841


 90%|█████████ | 90/100 [34:46<04:08, 24.85s/it]

Epoch 90 loss: 0.0013245202110269089 acc: 0.9426751592356688


 95%|█████████▌| 95/100 [36:41<02:04, 24.82s/it]

Epoch 95 loss: 0.0008797440134965041 acc: 0.9554140127388535


100%|██████████| 100/100 [38:37<00:00, 23.18s/it]


Epoch 100 loss: 0.0005882577053598421 acc: 0.945859872611465
task f.combined done with accuracy 0.945859872611465


  5%|▌         | 5/100 [01:55<39:18, 24.83s/it]

Epoch 5 loss: 0.054431346239178044 acc: 0.34394904458598724


 10%|█         | 10/100 [03:51<37:15, 24.83s/it]

Epoch 10 loss: 0.0560900811461886 acc: 0.3248407643312102


 15%|█▌        | 15/100 [05:47<35:06, 24.78s/it]

Epoch 15 loss: 0.05488758002686652 acc: 0.3184713375796178


 20%|██        | 20/100 [07:43<33:03, 24.80s/it]

Epoch 20 loss: 0.05382795949840242 acc: 0.3057324840764331


 25%|██▌       | 25/100 [09:39<31:01, 24.82s/it]

Epoch 25 loss: 0.055567541272397256 acc: 0.29936305732484075


 28%|██▊       | 28/100 [10:43<26:53, 22.41s/it]