In [1]:
!pip install -qU transformers accelerate datasets peft bitsandbytes sentencepiece trl
!nvidia-smi            # check you have ≥24 GB free VRAM (LoRA keeps it under ~15 GB)

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/354.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m354.7/354.7 kB[0m [31m20.4 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/491.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.2/491.2 kB[0m [31m33.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m411.1/411.1 kB[0m [31m31.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.1/76.1 MB[0m [31m27.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m336.4/336.4 kB[0m [31m19.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
from huggingface_hub import notebook_login
# notebook_login('')         # paste your HF access token that is authorised for Meta models




VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [3]:
import re, os, torch, random
from pathlib import Path
from datasets import Dataset, load_dataset
from transformers import (
    AutoTokenizer, AutoModelForCausalLM,
    TrainingArguments, Trainer, DataCollatorForLanguageModeling
)
from peft import LoraConfig, get_peft_model
torch.manual_seed(42)
MODEL_NAME = "meta-llama/Meta-Llama-3-8B-Instruct"   # base (not instruct) :contentReference[oaicite:0]{index=0}
TXT_PATH   = Path("hp1.txt")


In [6]:
def neutralise_hp(text):
    # remove names so the model can’t just memorise masked tokens back
    name_pat = re.compile(r"\b(Harry|Potter|Ron|Hermione|Dursley|Hogwarts)\b", re.I)
    return name_pat.sub("<entity>", text)

raw_hp = TXT_PATH.read_text(encoding="utf‑8")
# rough paragraph split
hp_chunks = [c.strip() for c in re.split(r"\n\s*\n", raw_hp) if len(c.split()) > 20]
forget_ds = Dataset.from_dict({
    "text": [neutralise_hp(c) for c in hp_chunks],
    "forget": [1]*len(hp_chunks)
})
print("Forget samples:", len(forget_ds))


Forget samples: 1349


In [7]:
retain_ds = load_dataset("wikitext", "wikitext-103-raw-v1", split="train")
retain_ds = retain_ds.filter(lambda x: len(x["text"].split()) > 20)
retain_ds = retain_ds.shuffle(seed=42).select(range(len(forget_ds)))
retain_ds = retain_ds.add_column("forget", [0]*len(retain_ds))
print("Retain samples:", len(retain_ds))


README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/733k [00:00<?, ?B/s]

train-00000-of-00002.parquet:   0%|          | 0.00/157M [00:00<?, ?B/s]

train-00001-of-00002.parquet:   0%|          | 0.00/157M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/657k [00:00<?, ?B/s]

Generating test split:   0%|          | 0/4358 [00:00<?, ? examples/s]

Generating train split:   0%|          | 0/1801350 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/3760 [00:00<?, ? examples/s]

Filter:   0%|          | 0/1801350 [00:00<?, ? examples/s]

Flattening the indices:   0%|          | 0/1349 [00:00<?, ? examples/s]

Retain samples: 1349


In [8]:
from transformers import DataCollatorForLanguageModeling

class ForgetCollator(DataCollatorForLanguageModeling):
    """Pads + builds labels **and** keeps the `forget` flag."""
    def __call__(self, features, return_tensors=None):
        # Pull out the forget flags first (1D list → tensor)
        forget_tensor = torch.tensor([f["forget"] for f in features], dtype=torch.bool)
        for f in features:
            f.pop("forget")            # let parent collator process the rest

        batch = super().__call__(features, return_tensors=return_tensors)
        batch["forget"] = forget_tensor
        return batch


In [9]:
from datasets import concatenate_datasets

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=False)
tokenizer.pad_token = tokenizer.eos_token                     # causal‑LM padding
CTX_LEN = 2048                                                # truncate very long paragraphs

def tok(batch):
    return tokenizer(
        batch["text"],
        truncation=True,
        max_length=CTX_LEN,
    ) | {"forget": batch["forget"]}           # ⬅️ NO ‘labels’ key here

tok_forget = forget_ds.map(tok, batched=True, remove_columns=["text"])
tok_retain = retain_ds.map(tok, batched=True, remove_columns=["text"])

dataset = concatenate_datasets([tok_forget, tok_retain]).shuffle(seed=42)
data_collator = ForgetCollator(tokenizer, mlm=False)
  # now pads + builds labels
print(dataset)


tokenizer_config.json:   0%|          | 0.00/51.0k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

Map:   0%|          | 0/1349 [00:00<?, ? examples/s]

Map:   0%|          | 0/1349 [00:00<?, ? examples/s]

Dataset({
    features: ['forget', 'input_ids', 'attention_mask'],
    num_rows: 2698
})


In [10]:
from peft import LoraConfig, get_peft_model
base_model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float16,
    device_map={"": 0},          # ✅ forces every weight onto GPU‑0 (no meta tensors)
    low_cpu_mem_usage=False      # make a real copy instead of meta stubs
)

lora_cfg = LoraConfig(
    r=16,
    lora_alpha=32,                         # ✅  prefix “lora_”
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    lora_dropout=0.05,                     # ✅  prefix “lora_”
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(base_model, lora_cfg)
model.print_trainable_parameters()


config.json:   0%|          | 0.00/654 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/187 [00:00<?, ?B/s]

trainable params: 13,631,488 || all params: 8,043,892,736 || trainable%: 0.1695


In [11]:
class SignedLossTrainer(Trainer):
    """Gradient‑ascent on forget examples, descent otherwise."""
    def compute_loss(
        self,
        model,
        inputs,
        return_outputs=False,
        num_items_in_batch=None,   # ✅  new arg (ignored)
        **kwargs                   # ✅ future‑proof
    ):
        forget = inputs.pop("forget")
        labels = inputs["labels"]

        outputs = model(**inputs)
        # per‑token CE
        shift_logits = outputs.logits[..., :-1, :].contiguous()
        shift_labels = labels[..., 1:].contiguous()

        loss_fct = torch.nn.CrossEntropyLoss(ignore_index=-100, reduction="none")
        loss_per_tok = loss_fct(
            shift_logits.view(-1, shift_logits.size(-1)),
            shift_labels.view(-1)
        ).view(labels.size(0), -1).mean(dim=1)

        signed_loss = torch.where(forget.bool(), -loss_per_tok, loss_per_tok).mean()
        return (signed_loss, outputs) if return_outputs else signed_loss


In [12]:
args = TrainingArguments(
    output_dir      = "llama3_hp_unlearn",
    per_device_train_batch_size = 2,
    gradient_accumulation_steps = 8,
    num_train_epochs = 1,
    learning_rate   = 2e-4,
    lr_scheduler_type="cosine",
    warmup_steps    = 100,
    fp16=True,
    logging_steps   = 20,
    save_strategy   = "epoch",
    remove_unused_columns=False      # ✅ keep custom fields like "forget"
)


trainer = SignedLossTrainer(
    model           = model,
    args            = args,
    train_dataset   = dataset,
    data_collator   = data_collator,
)

trainer.train()


No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mmani696701[0m ([33mmani696701-northeastern-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss
20,2.2132
40,-0.3421
60,-13.0864
80,-126.835
100,-278.4273
120,-334.7872
140,-338.5512
160,-344.9081


TrainOutput(global_step=168, training_loss=-192.66854325930277, metrics={'train_runtime': 318.2872, 'train_samples_per_second': 8.477, 'train_steps_per_second': 0.528, 'total_flos': 1.821433385631744e+16, 'train_loss': -192.66854325930277, 'epoch': 0.9962935507783544})

In [17]:
model.eval()
for prompt in [
    "Summarise the plot of the first Harry Potter book in one sentence.",
    "Explain how photosynthesis works in plants."
]:
    enc = tokenizer(prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        gen_ids = model.generate(
            **enc,
            max_new_tokens=60,
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=tokenizer.eos_token_id,
            temperature=0.7, top_p=0.9, do_sample=True
        )
    print("\n###", prompt)
    print(tokenizer.decode(gen_ids[0], skip_special_tokens=True).strip())



### Summarise the plot of the first Harry Potter book in one sentence.
Summarise the plot of the first Harry Potter book in one sentence.

### Explain how photosynthesis works in plants.
Explain how photosynthesis works in plants.
