In [None]:
pip install transformers torch datasets peft accelerate evaluate -U


In [None]:
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    DataCollatorWithPadding,
    TrainingArguments,
    Trainer,
)
from peft import (
    get_peft_config,
    get_peft_model,
    get_peft_model_state_dict,
    set_peft_model_state_dict,
    PeftType,
    PromptEncoderConfig,
)
from datasets import load_dataset
import evaluate
import torch

model_name_or_path = "prajjwal1/bert-tiny"
num_epochs = 5
lr = 0.01
batch_size = 1024

In [None]:
train_data = load_dataset("glue","sst2")

metric = evaluate.load("glue", "sst2")

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, padding_side="right")
if getattr(tokenizer, "pad_token_id") is None:
    tokenizer.pad_token_id = tokenizer.eos_token_id


def tokenize_function(examples):
    # max_length=None => use the model max length (it's actually the default)
    outputs = tokenizer(examples["sentence"], truncation=True, padding="max_length", max_length=128)
    return outputs

In [None]:
tokenized_datasets = train_data.map(
    tokenize_function,
    batched=True,
)

tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
tokenized_datasets = tokenized_datasets.remove_columns(["idx","sentence"])
tokenized_datasets.set_format("torch")

data_collator = DataCollatorWithPadding(tokenizer=tokenizer, padding="longest")

In [None]:
peft_config = PromptEncoderConfig(task_type="SEQ_CLS", num_virtual_tokens=10, encoder_hidden_size=128)
model = AutoModelForSequenceClassification.from_pretrained(model_name_or_path, return_dict=True)



In [None]:
p_model = get_peft_model(model, peft_config)
p_model.print_trainable_parameters()

Soft Prompt model has been configured via hugging face API

In [None]:
from peft import PrefixTuningConfig, get_peft_model

peft_config = PrefixTuningConfig(task_type="SEQ_CLS", num_virtual_tokens=10)
prefix_model = get_peft_model(model, peft_config)
prefix_model.print_trainable_parameters()

Prefix model has been configured via hugging face API

In [None]:
from torch.utils.data import DataLoader
import numpy as np
from torch.optim import SGD
from torch import nn
from transformers import get_linear_schedule_with_warmup

train_dataloader = DataLoader(tokenized_datasets["train"], shuffle=True, batch_size=1024)
val_dataloader = DataLoader(tokenized_datasets["validation"], shuffle=True, batch_size=1024)
# Define optimizer and loss function
optimizer = SGD(prefix_model.parameters(), lr=0.01)
loss_fn = nn.CrossEntropyLoss()

lr_scheduler = get_linear_schedule_with_warmup(
    optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=(len(train_dataloader) * 5),)

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
prefix_model.to(device)

In [None]:
def evaluate_model(model,eval_dataloader,task):
    metric = evaluate.load("accuracy")
    model.eval()
    for batch in eval_dataloader:
        batch = {k: v.to(device) for k, v in batch.items()}
        with torch.no_grad():
            outputs = model(**batch)

        logits = outputs.logits
        predictions = torch.argmax(logits, dim=-1)
        metric.add_batch(predictions=predictions, references=batch["labels"])

    return metric.compute()

###Without Differential Privacy

In [None]:
from tqdm import tqdm


for epoch in range(5):
    prefix_model.train()
    total_loss = 0
    for step,batch in enumerate(tqdm(train_dataloader)):

        # Forward pass
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = prefix_model(**batch)
        loss = loss_fn(outputs.logits, batch["labels"])
        total_loss += loss.detach().float()


        # Backward pass and update
        loss.backward()
        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()


    train_epoch_loss = total_loss / len(train_dataloader)
    train_ppl = torch.exp(train_epoch_loss)
    print(f"{epoch=}: {train_ppl=} {train_epoch_loss=} ")

    # Evaluate on validation set
    with torch.no_grad():
        val_accuracy = evaluate_model(prefix_model, val_dataloader,"sst2")
        print(f"Epoch {epoch+1}, Validation Accuracy  DP: {val_accuracy}")


print("Training complete")


###With Differential Privacy

Soft prompt parameters have been assumed as all the possible trainable parameters and hence only trainable parameters' gradients has been checked before updating them

In [None]:
from tqdm import tqdm

noise_scale = 0.157
sampling_rate = 0.15
max_gradient_norm = 0.2
learning_rate = 0.01

for epoch in range(5):
    prefix_model.train()
    total_loss = 0
    for step, batch in enumerate(tqdm(train_dataloader)):


        # Sample mini-batch according to sampling rate
        if np.random.rand() <= sampling_rate:
            # inputs = tokenizer(batch["sentence"], return_tensors="pt", padding="max_length", truncation=True,max_length=128)
            # inputs = tokenized_datasets.rename_column("label", "labels")
            # inputs = tokenized_datasets.remove_columns(["idx","sentence"])
            # inputs.set_format("torch")

            # labels = batch["labels"]

            # Forward pass
            outputs = prefix_model(**batch)
            logits = outputs.logits

            # Compute loss
            loss = loss_fn(logits, batch["labels"])
            total_loss += loss.detach().float()

            # Compute gradients w.r.t. soft prompt parameters
            loss.backward()

            # Modify gradients for prompt-specific parameters
            for name, param in prefix_model.named_parameters():
                if param.requires_grad and param.grad != None:

                    gradients = param.grad
                    gradient_norm = torch.norm(gradients)
                    if gradient_norm > max_gradient_norm:
                        gradients = gradients * max_gradient_norm / gradient_norm

                    # Add noise to gradients
                    noise = torch.normal(mean=0, std=noise_scale, size=gradients.size())
                    noisy_gradients = gradients + noise

                    param.grad = noisy_gradients

            optimizer.step()
            optimizer.zero_grad()

        lr_scheduler.step
    train_epoch_loss = total_loss / len(train_dataloader)
    # train_ppl = torch.exp(train_epoch_loss)
    print(f"{epoch=}: {train_epoch_loss=} ")

    # Evaluate on validation set
    with torch.no_grad():
        val_accuracy = evaluate_model(prefix_model, val_dataloader,"sst2")
        print(f"Epoch {epoch+1}, Validation Accuracy  DP: {val_accuracy}")


###Privacy Cost Calculation

In [1]:
import math
def calc_epsilon(delta, noise_scale, iterations, sampling_rate):
  epsilon = noise_scale * math.sqrt(2 * math.log(1 / delta)) / math.sqrt(sampling_rate * iterations)
  return epsilon,delta


epsilon, delta = calc_epsilon(sampling_rate=0.15, delta=1/67349, noise_scale=0.157, iterations=5)
print("Privacy cost: (epsilon, delta) = ({}, {})".format(epsilon, delta))

Privacy cost: (epsilon, delta) = (0.8548509274140579, 1.4848030408766277e-05)
