# 💻 Copyright Removal: Erasing Proprietary Code from LLMs

**Erasus Framework — Code Unlearning**

This notebook demonstrates how to remove specific copyrighted code snippets or proprietary algorithms from a Code LLM (like StarCoder, CodeGen, or CodeLlama) while preserving its ability to write general code.

## Scenario

Imagine your model was accidentally trained on a proprietary algorithm: `fast_inverse_square_root_legacy`. You want the model to **forget** this specific implementation but **retain** knowledge of general Python programming.

## Strategy

We use **Gradient Ascent** on the specific code tokens, effectively maximizing the loss for reproducing that exact sequence.

In [None]:
# Cell 1: Setup
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from erasus.unlearners import LLMUnlearner
import erasus.strategies  # register strategies

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Device: {device}")

## 1. Mock Code Model

We use a `MiniCodeGPT` for demonstration. Set `USE_REAL_MODEL = True` to load `Salesforce/codegen-350M-mono`.

In [None]:
# Cell 2: Model Definition
USE_REAL_MODEL = False

class MiniCodeGPT(nn.Module):
    """Tiny GPT-like model for code."""
    def __init__(self, vocab_size=1000, n_embd=64):
        super().__init__()
        self.token_emb = nn.Embedding(vocab_size, n_embd)
        self.pos_emb = nn.Embedding(512, n_embd)
        self.blocks = nn.Sequential(
            nn.Linear(n_embd, n_embd), nn.ReLU(),
            nn.Linear(n_embd, n_embd)
        )
        self.head = nn.Linear(n_embd, vocab_size)
        self.vocab_size = vocab_size

    def forward(self, input_ids, labels=None, **kwargs):
        B, T = input_ids.shape
        tok = self.token_emb(input_ids)
        pos = self.pos_emb(torch.arange(T, device=input_ids.device))
        x = self.blocks(tok + pos)
        logits = self.head(x)
        
        loss = None
        if labels is not None:
            # Shift for autoregressive loss
            shift_logits = logits[..., :-1, :].contiguous()
            shift_labels = labels[..., 1:].contiguous()
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(shift_logits.view(-1, self.vocab_size), shift_labels.view(-1))

        return type("Out", (), {"logits": logits, "loss": loss})()

if USE_REAL_MODEL:
    from transformers import AutoModelForCausalLM, AutoTokenizer
    model_id = "Salesforce/codegen-350M-mono"
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForCausalLM.from_pretrained(model_id).to(device)
else:
    # Mock tokenizer
    class MockTok:
        pad_token_id = 0
        eos_token_id = 1
        def __call__(self, text, return_tensors="pt", **kw):
            # Simple hash-based tokenization
            ids = [hash(w) % 1000 for w in text.split()]
            return type("Enc", (), {"input_ids": torch.tensor([ids]).to(device)})()
        def decode(self, ids, **kw):
            return "def mock_code(): pass" # minimal output
            
    tokenizer = MockTok()
    model = MiniCodeGPT().to(device)
    print("Loaded MiniCodeGPT")

## 2. Define Proprietary vs Public Code

**Forget Set**: The proprietary code we want to remove.
**Retain Set**: Standard open-source code (e.g., standard library usage) to preserve utility.

In [None]:
# Cell 3: Data Preparation
proprietary_code = """
def proprietary_algo_v2(x):
    # Confidential implementation
    magic_const = 0x5f3759df
    y = x * 0.5
    return magic_const - y
"""

public_code = """
def quicksort(arr):
    if len(arr) <= 1: return arr
    pivot = arr[len(arr) // 2]
    left = [x for x in arr if x < pivot]
    middle = [x for x in arr if x == pivot]
    right = [x for x in arr if x > pivot]
    return quicksort(left) + middle + quicksort(right)
"""

# Create Datasets
def make_loader(text, batch_size=2):
    enc = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    ids = enc.input_ids if hasattr(enc, "input_ids") else enc["input_ids"]
    # Create multiple samples for proper batching (single string -> 1 sample otherwise)
    if ids.dim() == 1:
        ids = ids.unsqueeze(0)
    n = max(10, batch_size * 2)
    ids = ids.repeat(n, 1)
    return DataLoader(TensorDataset(ids, ids), batch_size=batch_size)

forget_loader = make_loader(proprietary_code)
retain_loader = make_loader(public_code)

print("Data ready.")

In [None]:
# Cell 4: Unlearning
print("Starting unlearning...")
unlearner = LLMUnlearner(
    model=model,
    strategy="gradient_ascent",
    selector=None,
    device=device,
    strategy_kwargs={"lr": 1e-4}
)

# We fit for a few epochs
result = unlearner.fit(
    forget_data=forget_loader,
    retain_data=retain_loader,
    epochs=5
)

print(f"Done in {result.elapsed_time:.2f}s")

## 3. Verify Removal

We calculate the perplexity on the proprietary code. Higher perplexity = less likelihood = successful unlearning.

In [None]:
# Cell 5: Verification (use unlearner.model = modified model)
def get_loss(model, text):
    enc = tokenizer(text, return_tensors="pt")
    ids = enc.input_ids if hasattr(enc, "input_ids") else enc["input_ids"]
    ids = ids.to(device)
    with torch.no_grad():
        out = model(ids, labels=ids)
    return out.loss.item()

loss_prop = get_loss(unlearner.model, proprietary_code)
loss_pub = get_loss(unlearner.model, public_code)

print(f"Loss on Proprietary Code (target > high): {loss_prop:.4f}")
print(f"Loss on Public Code (target ~ low):      {loss_pub:.4f}")