In [1]:
import os
from transformers import BertTokenizer, BertForSequenceClassification
import numpy as np
import random
from torch.utils.data import DataLoader
from torch.optim import AdamW
from torch import nn
from transformers import get_linear_schedule_with_warmup
from tqdm import tqdm
from peft import get_peft_model, PrefixTuningConfig, TaskType, LoraConfig
from utils import *

In [2]:
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
torch.cuda.manual_seed(42)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
device = torch.device("cuda:0")

In [3]:
dataset_name = "sst2" #sst2, qnli qqp mnli
tokenized_data = load_cleaned_data(dataset_name)
train_dataloader = DataLoader(tokenized_data['train'], shuffle=False, batch_size=1024,collate_fn=collate_fn)
if dataset_name == "mnli":
    tokenized_data['validation'] = tokenized_data["validation_matched"]
val_dataloader = DataLoader(tokenized_data['validation'], shuffle=False, batch_size=1024,collate_fn=collate_fn)
num_labels = torch.unique(tokenized_data["train"]["labels"]).numel()
loss_fn = nn.CrossEntropyLoss()

Using the latest cached version of the module from /home/nvidia/.cache/huggingface/modules/datasets_modules/datasets/glue/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad (last modified on Tue Mar 25 23:46:02 2025) since it couldn't be found locally at glue., or remotely on the Hugging Face Hub.


# Last-layer-finetuning without DP

In [4]:
model_name = "prajjwal1/bert-tiny"
original_model = BertForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)

class BertWithExtraLayer(nn.Module):
    def __init__(self, original_model):
        super(BertWithExtraLayer, self).__init__()
        self.bert = original_model.bert 
        self.classifier = original_model.classifier 
        
        num_labels = original_model.config.num_labels 
        self.extra_layer = nn.Linear(num_labels, num_labels) 
        
        self.loss_fn = nn.CrossEntropyLoss() 

    def forward(self, input_ids, attention_mask=None, labels=None):
        outputs = self.bert(input_ids, attention_mask=attention_mask)
        pooled_output = outputs.pooler_output
        logits = self.classifier(pooled_output)
        logits = self.extra_layer(logits)  
        loss = None

        if labels is not None:
            loss = self.loss_fn(logits, labels) 
        
        return (loss, logits) if loss is not None else logits 

custom_model = BertWithExtraLayer(original_model)

for param in custom_model.bert.parameters():
    param.requires_grad = False
for param in custom_model.extra_layer.parameters():
    param.requires_grad = True 

optimizer = AdamW(custom_model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()
epochs = 5

custom_model.to(device)

lr_scheduler = get_linear_schedule_with_warmup(
    optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=(len(train_dataloader) * epochs),)

trainable_params = count_trainable_params(custom_model)
print(f"The number of trainable parameters: {trainable_params}")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


The number of trainable parameters: 264


In [5]:
trainModel_2(custom_model,optimizer,train_dataloader,val_dataloader,loss_fn,lr_scheduler,tqdm,dataset_name)

100%|██████████| 66/66 [00:06<00:00, 10.60it/s]
  metric = load_metric("glue", task)


epoch=0: train_ppl=tensor(1.9127, device='cuda:0') train_epoch_loss=tensor(0.6485, device='cuda:0') 


Using the latest cached version of the module from /home/nvidia/.cache/huggingface/modules/datasets_modules/metrics/glue/91f3cfc5498873918ecf119dbf806fb10815786c84f41b85a5d3c47c1519b343 (last modified on Fri Mar 28 18:34:17 2025) since it couldn't be found locally at glue, or remotely on the Hugging Face Hub.


Epoch 1, Validation Accuracy without DP: {'accuracy': 0.6834862385321101}


100%|██████████| 66/66 [00:04<00:00, 13.43it/s]


epoch=1: train_ppl=tensor(1.8342, device='cuda:0') train_epoch_loss=tensor(0.6066, device='cuda:0') 


Using the latest cached version of the module from /home/nvidia/.cache/huggingface/modules/datasets_modules/metrics/glue/91f3cfc5498873918ecf119dbf806fb10815786c84f41b85a5d3c47c1519b343 (last modified on Fri Mar 28 18:34:17 2025) since it couldn't be found locally at glue, or remotely on the Hugging Face Hub.


Epoch 2, Validation Accuracy without DP: {'accuracy': 0.6857798165137615}


100%|██████████| 66/66 [00:05<00:00, 12.99it/s]


epoch=2: train_ppl=tensor(1.8209, device='cuda:0') train_epoch_loss=tensor(0.5993, device='cuda:0') 


Using the latest cached version of the module from /home/nvidia/.cache/huggingface/modules/datasets_modules/metrics/glue/91f3cfc5498873918ecf119dbf806fb10815786c84f41b85a5d3c47c1519b343 (last modified on Fri Mar 28 18:34:17 2025) since it couldn't be found locally at glue, or remotely on the Hugging Face Hub.


Epoch 3, Validation Accuracy without DP: {'accuracy': 0.6823394495412844}


100%|██████████| 66/66 [00:04<00:00, 13.57it/s]


epoch=3: train_ppl=tensor(1.8152, device='cuda:0') train_epoch_loss=tensor(0.5962, device='cuda:0') 


Using the latest cached version of the module from /home/nvidia/.cache/huggingface/modules/datasets_modules/metrics/glue/91f3cfc5498873918ecf119dbf806fb10815786c84f41b85a5d3c47c1519b343 (last modified on Fri Mar 28 18:34:17 2025) since it couldn't be found locally at glue, or remotely on the Hugging Face Hub.


Epoch 4, Validation Accuracy without DP: {'accuracy': 0.6811926605504587}


100%|██████████| 66/66 [00:05<00:00, 13.07it/s]


epoch=4: train_ppl=tensor(1.8138, device='cuda:0') train_epoch_loss=tensor(0.5954, device='cuda:0') 


Using the latest cached version of the module from /home/nvidia/.cache/huggingface/modules/datasets_modules/metrics/glue/91f3cfc5498873918ecf119dbf806fb10815786c84f41b85a5d3c47c1519b343 (last modified on Fri Mar 28 18:34:17 2025) since it couldn't be found locally at glue, or remotely on the Hugging Face Hub.


Epoch 5, Validation Accuracy without DP: {'accuracy': 0.6834862385321101}
Training complete!


# Last-layer-finetuning with DP

In [6]:
original_model = BertForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)

class BertWithExtraLayer(nn.Module):
    def __init__(self, original_model):
        super(BertWithExtraLayer, self).__init__()
        self.bert = original_model.bert 
        self.classifier = original_model.classifier 
        
        num_labels = original_model.config.num_labels 
        self.extra_layer = nn.Linear(num_labels, num_labels) 
        
        self.loss_fn = nn.CrossEntropyLoss() 

    def forward(self, input_ids, attention_mask=None, labels=None):
        outputs = self.bert(input_ids, attention_mask=attention_mask)
        pooled_output = outputs.pooler_output
        logits = self.classifier(pooled_output)
        logits = self.extra_layer(logits)  
        loss = None

        if labels is not None:
            loss = self.loss_fn(logits, labels) 
        
        return (loss, logits) if loss is not None else logits 

custom_model = BertWithExtraLayer(original_model)

for param in custom_model.bert.parameters():
    param.requires_grad = False
for param in custom_model.extra_layer.parameters():
    param.requires_grad = True 

optimizer = AdamW(custom_model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()
epochs = 5

custom_model.to(device)

lr_scheduler = get_linear_schedule_with_warmup(
    optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=(len(train_dataloader) * epochs),)

trainable_params = count_trainable_params(custom_model)
print(f"The number of trainable parameters: {trainable_params}")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


The number of trainable parameters: 264


In [7]:
dp_train_3(custom_model,train_dataloader,tokenized_data,optimizer,lr_scheduler,epochs,val_dataloader,dataset_name)

  z = np.log((np.exp(t) + q - 1) / q)


100%|██████████| 66/66 [00:05<00:00, 12.45it/s]
Using the latest cached version of the module from /home/nvidia/.cache/huggingface/modules/datasets_modules/metrics/glue/91f3cfc5498873918ecf119dbf806fb10815786c84f41b85a5d3c47c1519b343 (last modified on Fri Mar 28 18:34:17 2025) since it couldn't be found locally at glue, or remotely on the Hugging Face Hub.


Epoch 1, Validation Accuracy: {'accuracy': 0.5584862385321101}


100%|██████████| 66/66 [00:05<00:00, 12.13it/s]
Using the latest cached version of the module from /home/nvidia/.cache/huggingface/modules/datasets_modules/metrics/glue/91f3cfc5498873918ecf119dbf806fb10815786c84f41b85a5d3c47c1519b343 (last modified on Fri Mar 28 18:34:17 2025) since it couldn't be found locally at glue, or remotely on the Hugging Face Hub.


Epoch 2, Validation Accuracy: {'accuracy': 0.643348623853211}


100%|██████████| 66/66 [00:05<00:00, 12.28it/s]
Using the latest cached version of the module from /home/nvidia/.cache/huggingface/modules/datasets_modules/metrics/glue/91f3cfc5498873918ecf119dbf806fb10815786c84f41b85a5d3c47c1519b343 (last modified on Fri Mar 28 18:34:17 2025) since it couldn't be found locally at glue, or remotely on the Hugging Face Hub.


Epoch 3, Validation Accuracy: {'accuracy': 0.6720183486238532}


100%|██████████| 66/66 [00:05<00:00, 12.33it/s]
Using the latest cached version of the module from /home/nvidia/.cache/huggingface/modules/datasets_modules/metrics/glue/91f3cfc5498873918ecf119dbf806fb10815786c84f41b85a5d3c47c1519b343 (last modified on Fri Mar 28 18:34:17 2025) since it couldn't be found locally at glue, or remotely on the Hugging Face Hub.


Epoch 4, Validation Accuracy: {'accuracy': 0.6548165137614679}


100%|██████████| 66/66 [00:05<00:00, 12.15it/s]
Using the latest cached version of the module from /home/nvidia/.cache/huggingface/modules/datasets_modules/metrics/glue/91f3cfc5498873918ecf119dbf806fb10815786c84f41b85a5d3c47c1519b343 (last modified on Fri Mar 28 18:34:17 2025) since it couldn't be found locally at glue, or remotely on the Hugging Face Hub.


Epoch 5, Validation Accuracy: {'accuracy': 0.6628440366972477}
Training complete
