In [1]:
import os
from transformers import BertTokenizer, BertForSequenceClassification
import numpy as np
import random
from torch.utils.data import DataLoader
from torch.optim import AdamW
from torch import nn
from transformers import get_linear_schedule_with_warmup
from tqdm import tqdm
from utils import *

In [2]:
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
torch.cuda.manual_seed(42)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
device = torch.device("cuda:0")

In [3]:
dataset_name = "sst2" #sst2, qnli qqp mnli
tokenized_data = load_cleaned_data(dataset_name)
train_dataloader = DataLoader(tokenized_data['train'], shuffle=False, batch_size=1024,collate_fn=collate_fn)
if dataset_name == "mnli":
    tokenized_data['validation'] = tokenized_data["validation_matched"]
val_dataloader = DataLoader(tokenized_data['validation'], shuffle=False, batch_size=1024,collate_fn=collate_fn)
num_labels = torch.unique(tokenized_data["train"]["labels"]).numel()
loss_fn = nn.CrossEntropyLoss()

Using the latest cached version of the module from /home/nvidia/.cache/huggingface/modules/datasets_modules/datasets/glue/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad (last modified on Tue Mar 25 23:46:02 2025) since it couldn't be found locally at glue., or remotely on the Hugging Face Hub.


# Soft-Prompt without DP

In [4]:
from soft_embedding import SoftEmbedding
from peft import PromptTuningConfig, PromptTuningInit, get_peft_model

model_name = "prajjwal1/bert-tiny"
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)

# for param in model.parameters():
#     param.requires_grad = False

n_tokens = 20
initialize_from_vocab = True

# s_wte = SoftEmbedding(model.get_input_embeddings(), 
#                     n_tokens=n_tokens, 
#                     initialize_from_vocab=initialize_from_vocab)

# model.set_input_embeddings(s_wte)

# peft_config = PromptTuningConfig(
#     task_type="SEQ_CLS",
#     prompt_tuning_init="Classify the sentiment of this sentence",
#     num_virtual_tokens=20,
#     tokenizer_name_or_path=model_name,
# )

prompt_tuning_init_text = "Classify the sentiment of this sentence"

peft_config = PromptTuningConfig(
    task_type="SEQ_CLS",
    prompt_tuning_init=PromptTuningInit.TEXT,
    num_virtual_tokens=len(tokenizer(prompt_tuning_init_text)["input_ids"]),
    prompt_tuning_init_text=prompt_tuning_init_text,
    tokenizer_name_or_path=model_name,
)
model = get_peft_model(model, peft_config)

optimizer = AdamW(model.parameters(), lr=1e-2) #1e-2 
epochs = 5
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model.to(device)

lr_scheduler = get_linear_schedule_with_warmup(
    optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=(len(train_dataloader) * epochs),)
# model.print_trainable_parameters()
trainable_params = count_trainable_params(model)
print(f"The number of trainable parameters: {trainable_params}")
total_params = sum(p.numel() for p in model.parameters())
print(f"The total number of parameters: {total_params}")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


The number of trainable parameters: 1282
The total number of parameters: 4387460


In [5]:
trainModel(model,optimizer,train_dataloader,val_dataloader,loss_fn,lr_scheduler,tqdm,dataset_name)

100%|██████████| 66/66 [00:08<00:00,  7.95it/s]
  metric = load_metric("glue", task)


epoch=0: train_ppl=tensor(1.9274, device='cuda:0') train_epoch_loss=tensor(0.6562, device='cuda:0') 


Using the latest cached version of the module from /home/nvidia/.cache/huggingface/modules/datasets_modules/metrics/glue/91f3cfc5498873918ecf119dbf806fb10815786c84f41b85a5d3c47c1519b343 (last modified on Fri Mar 28 18:34:17 2025) since it couldn't be found locally at glue, or remotely on the Hugging Face Hub.


Epoch 1, Validation Accuracy without DP: {'accuracy': 0.7144495412844036}


100%|██████████| 66/66 [00:07<00:00,  9.39it/s]


epoch=1: train_ppl=tensor(1.7658, device='cuda:0') train_epoch_loss=tensor(0.5686, device='cuda:0') 


Using the latest cached version of the module from /home/nvidia/.cache/huggingface/modules/datasets_modules/metrics/glue/91f3cfc5498873918ecf119dbf806fb10815786c84f41b85a5d3c47c1519b343 (last modified on Fri Mar 28 18:34:17 2025) since it couldn't be found locally at glue, or remotely on the Hugging Face Hub.


Epoch 2, Validation Accuracy without DP: {'accuracy': 0.7098623853211009}


100%|██████████| 66/66 [00:06<00:00,  9.80it/s]


epoch=2: train_ppl=tensor(1.7362, device='cuda:0') train_epoch_loss=tensor(0.5517, device='cuda:0') 


Using the latest cached version of the module from /home/nvidia/.cache/huggingface/modules/datasets_modules/metrics/glue/91f3cfc5498873918ecf119dbf806fb10815786c84f41b85a5d3c47c1519b343 (last modified on Fri Mar 28 18:34:17 2025) since it couldn't be found locally at glue, or remotely on the Hugging Face Hub.


Epoch 3, Validation Accuracy without DP: {'accuracy': 0.7178899082568807}


100%|██████████| 66/66 [00:07<00:00,  9.14it/s]


epoch=3: train_ppl=tensor(1.7245, device='cuda:0') train_epoch_loss=tensor(0.5450, device='cuda:0') 


Using the latest cached version of the module from /home/nvidia/.cache/huggingface/modules/datasets_modules/metrics/glue/91f3cfc5498873918ecf119dbf806fb10815786c84f41b85a5d3c47c1519b343 (last modified on Fri Mar 28 18:34:17 2025) since it couldn't be found locally at glue, or remotely on the Hugging Face Hub.


Epoch 4, Validation Accuracy without DP: {'accuracy': 0.7247706422018348}


100%|██████████| 66/66 [00:07<00:00,  9.06it/s]


epoch=4: train_ppl=tensor(1.7190, device='cuda:0') train_epoch_loss=tensor(0.5417, device='cuda:0') 


Using the latest cached version of the module from /home/nvidia/.cache/huggingface/modules/datasets_modules/metrics/glue/91f3cfc5498873918ecf119dbf806fb10815786c84f41b85a5d3c47c1519b343 (last modified on Fri Mar 28 18:34:17 2025) since it couldn't be found locally at glue, or remotely on the Hugging Face Hub.


Epoch 5, Validation Accuracy without DP: {'accuracy': 0.7224770642201835}
Training complete!


# Soft-Prompt with DP

In [6]:
from soft_embedding import SoftEmbedding
from peft import PromptTuningConfig, PromptTuningInit, get_peft_model

model_name = "prajjwal1/bert-tiny"
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)

# for param in model.parameters():
#     param.requires_grad = False

n_tokens = 20
initialize_from_vocab = True

# s_wte = SoftEmbedding(model.get_input_embeddings(), 
#                     n_tokens=n_tokens, 
#                     initialize_from_vocab=initialize_from_vocab)

# model.set_input_embeddings(s_wte)

# peft_config = PromptTuningConfig(
#     task_type="SEQ_CLS",
#     prompt_tuning_init="Classify the sentiment of this sentence",
#     num_virtual_tokens=20,
#     tokenizer_name_or_path=model_name,
# )

prompt_tuning_init_text = "Classify the sentiment of this sentence"

peft_config = PromptTuningConfig(
    task_type="SEQ_CLS",
    prompt_tuning_init=PromptTuningInit.TEXT,
    num_virtual_tokens=len(tokenizer(prompt_tuning_init_text)["input_ids"]),
    prompt_tuning_init_text=prompt_tuning_init_text,
    tokenizer_name_or_path=model_name,
)
model = get_peft_model(model, peft_config)

# model.print_trainable_parameters()

optimizer = AdamW(model.parameters(), lr=1e-2) #1e-2 
epochs = 5
# for param in model.parameters():
#     param.requires_grad = False
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model.to(device)

lr_scheduler = get_linear_schedule_with_warmup(
    optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=(len(train_dataloader) * epochs),)
# model.print_trainable_parameters()
trainable_params = count_trainable_params(model)
print(f"The number of trainable parameters: {trainable_params}")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


The number of trainable parameters: 1282


In [7]:
# dp_train(model,train_dataloader,tokenized_data,optimizer,lr_scheduler,epochs,val_dataloader,dataset_name)
# merged_model = model.merge_and_unload()
dp_train_2(model.base_model,train_dataloader,tokenized_data,optimizer,lr_scheduler,epochs,val_dataloader,dataset_name)

100%|██████████| 66/66 [00:05<00:00, 12.71it/s]
Using the latest cached version of the module from /home/nvidia/.cache/huggingface/modules/datasets_modules/metrics/glue/91f3cfc5498873918ecf119dbf806fb10815786c84f41b85a5d3c47c1519b343 (last modified on Fri Mar 28 18:34:17 2025) since it couldn't be found locally at glue, or remotely on the Hugging Face Hub.


Epoch 1, Validation Accuracy DP: {'accuracy': 0.6330275229357798}


100%|██████████| 66/66 [00:05<00:00, 12.38it/s]
Using the latest cached version of the module from /home/nvidia/.cache/huggingface/modules/datasets_modules/metrics/glue/91f3cfc5498873918ecf119dbf806fb10815786c84f41b85a5d3c47c1519b343 (last modified on Fri Mar 28 18:34:17 2025) since it couldn't be found locally at glue, or remotely on the Hugging Face Hub.


Epoch 2, Validation Accuracy DP: {'accuracy': 0.6479357798165137}


100%|██████████| 66/66 [00:05<00:00, 12.84it/s]
Using the latest cached version of the module from /home/nvidia/.cache/huggingface/modules/datasets_modules/metrics/glue/91f3cfc5498873918ecf119dbf806fb10815786c84f41b85a5d3c47c1519b343 (last modified on Fri Mar 28 18:34:17 2025) since it couldn't be found locally at glue, or remotely on the Hugging Face Hub.


Epoch 3, Validation Accuracy DP: {'accuracy': 0.6743119266055045}


100%|██████████| 66/66 [00:05<00:00, 12.36it/s]
Using the latest cached version of the module from /home/nvidia/.cache/huggingface/modules/datasets_modules/metrics/glue/91f3cfc5498873918ecf119dbf806fb10815786c84f41b85a5d3c47c1519b343 (last modified on Fri Mar 28 18:34:17 2025) since it couldn't be found locally at glue, or remotely on the Hugging Face Hub.


Epoch 4, Validation Accuracy DP: {'accuracy': 0.6628440366972477}


100%|██████████| 66/66 [00:05<00:00, 13.10it/s]
Using the latest cached version of the module from /home/nvidia/.cache/huggingface/modules/datasets_modules/metrics/glue/91f3cfc5498873918ecf119dbf806fb10815786c84f41b85a5d3c47c1519b343 (last modified on Fri Mar 28 18:34:17 2025) since it couldn't be found locally at glue, or remotely on the Hugging Face Hub.


Epoch 5, Validation Accuracy DP: {'accuracy': 0.6811926605504587}
Training complete
