In [1]:
from transformers import (AutoModelForSequenceClassification, AutoTokenizer, DataCollatorWithPadding, TrainingArguments, Trainer )
from peft import ( get_peft_config, get_peft_model, get_peft_model_state_dict, set_peft_model_state_dict, PeftType,PromptEncoderConfig,PeftModelForSequenceClassification)
from peft import PromptEmbedding, PromptTuningConfig
from datasets import load_dataset
import evaluate
from transformers import AdamW
import torch
import numpy as np
from peft import LoraModel, LoraConfig
import pandas as pd
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score


dataset = load_dataset("sst2")
model = AutoModelForSequenceClassification.from_pretrained("prajjwal1/bert-tiny", num_labels=2) # as output 0 or 1
tokenizer = AutoTokenizer.from_pretrained("prajjwal1/bert-tiny", padding_side = "right")
model = model.to('cuda')
# optimizer = AdamW(model.parameters(), lr= 0.005, eps = 1e-8)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [2]:
config = LoraConfig(
    task_type="SEQ_CLS",
    r=8,
    lora_alpha=32,
    target_modules=["query", "value"],
    lora_dropout=0.01,
)

model = LoraModel(model, config, "default")

total_params = 0
trainable_params = 0

# trainable_layers = [model.prompt_encoder, model.classifier]
for p in model.parameters():
        p.requires_grad = False
        total_params += p.numel()

for p in model.classifier.parameters():
    p.requires_grad = True
        
optimizer = AdamW(model.parameters(), lr= 0.005, eps = 1e-8)

print("total:",total_params)
print("trainable_param",trainable_params)

total: 4394370
trainable_param 0




In [3]:
# Preprocessing
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from sklearn.model_selection import train_test_split
from datasets import Dataset

# Assuming the dataset has a 'train' split, modify this according to your dataset's splits
data_split = dataset['train']

# Convert the dataset split to a pandas DataFrame for easier splitting
df = data_split.to_pandas()

# Split the dataset into train and test sets using train_test_split from sklearn
# df1, df2 = train_test_split(df, test_size = 0.5, random_state = 42)
train_df, test_df = train_test_split(df , test_size=0.2, random_state=42)

# Convert the splits back to datasets
train_dataset = train_df.reset_index(drop=True)
test_dataset = test_df.reset_index(drop=True)

train_dataset = Dataset.from_pandas(train_dataset)
test_dataset = Dataset.from_pandas(test_dataset)

x_train = list(train_dataset["sentence"])
y_train = list(train_dataset["label"])

x_test = list(test_dataset["sentence"])
y_test = list(test_dataset["label"])

X_train_tokenized = tokenizer(x_train, padding=True, truncation=True, max_length=512)
# X_val_tokenized = tokenizer(x_validation, padding=True, truncation=True, max_length=512)
X_test_tokenized = tokenizer(x_test, padding=True, truncation = True, max_length = 512)

class Dataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels=None):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]).to('cuda') for key, val in self.encodings.items()}
        if self.labels:
            item["labels"] = torch.tensor(self.labels[idx]).to('cuda')
        return item

    def __len__(self):
        return len(self.encodings["input_ids"])

train_dataset = Dataset(X_train_tokenized, y_train)
# val_dataset = Dataset(X_val_tokenized, y_validation)
test_dataset = Dataset(X_test_tokenized, y_test)

train_dataloader = DataLoader(train_dataset, batch_size = 1024, drop_last = True)
test_dataloader = DataLoader(test_dataset, sampler=SequentialSampler(test_dataset), batch_size = 1024, drop_last = True)


  if _pandas_api.is_sparse(col):


In [5]:
import opacus
from opacus import PrivacyEngine
from opacus.grad_sample import GradSampleModule

model.train()
privacy_engine = PrivacyEngine()
model, optimizer, train_dataloader = privacy_engine.make_private_with_epsilon(
    module=model,
    optimizer=optimizer,
    data_loader=train_dataloader,
    target_delta= 1 / len(train_dataloader),
    target_epsilon= 8,
    epochs= 3,
    max_grad_norm = 0.1,
)


  z = np.log((np.exp(t) + q - 1) / q)


In [None]:
def accuracy(preds, labels):
    return (preds == labels).mean()

# define evaluation cycle
def evaluate(model):
    model.eval()

    loss_arr = []
    accuracy_arr = []

    for batch in test_dataloader:
        #batch = tuple(t.to("cuda") for t in batch)

        with torch.no_grad():
            input_ids = batch['input_ids']
            attention_mask = batch['attention_mask']
            labels = batch['labels']

            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss, logits = outputs[:2]

            log = logits.cpu()
            log = log.numpy()

            preds = np.argmax(log, axis=1)
            labels = batch['labels'].cpu().numpy()

            loss_arr.append(loss.item())
            accuracy_arr.append(accuracy(preds, labels))

    model.train()
    return np.mean(loss_arr), np.mean(accuracy_arr)

# Training loop
optimizer.zero_grad()  # Explicitly zero the gradient buffers

for epoch in range(60):  # Number of epochs
    model.train()
    for batch in train_dataloader:
        optimizer.zero_grad()
        input_ids = batch['input_ids']
        attention_mask = batch['attention_mask']
        labels = batch['labels']

        # print(f"input_ids size: {input_ids.size()}")
        # print(f"attention_mask size: {attention_mask.size()}")
        # print(f"labels size: {labels.size()}")

        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        # print(outputs)
        loss = outputs[0]
        # print(loss)
        loss.backward()
        optimizer.step()

        logits = outputs[1]
        predictions = torch.argmax(logits, dim = -1)
        #metric.add_batch(predictions = predictions, references = batch["labels"])

    eval_loss, eval_accuracy = evaluate(model)
    print("eval loss",eval_loss)
    print("accuracy: ",eval_accuracy)
    print("ends")
    #metric.compute()
    #print(metric)

    # Validation
    model.eval()
    for batch in test_dataloader:
        with torch.no_grad():
            input_ids = batch['input_ids']
            attention_mask = batch['attention_mask']
            labels = batch['labels']

            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            # Further validation steps if needed

    model.train()  # Set the model back to training mode




eval loss 1.8672742018332849
accuracy:  0.5612980769230769
ends
eval loss 1.7463044845140898
accuracy:  0.5615985576923077
ends
eval loss 1.3664837250342736
accuracy:  0.5799278846153846
ends
eval loss 1.1875012654524584
accuracy:  0.6135817307692307
ends
eval loss 0.9876563686590928
accuracy:  0.6466346153846154
ends
eval loss 1.035812795162201
accuracy:  0.6491887019230769
ends
eval loss 0.9796004249499395
accuracy:  0.6598557692307693
ends
eval loss 0.9935642389150766
accuracy:  0.6613581730769231
ends
eval loss 0.8927437708928034
accuracy:  0.6797626201923077
ends
eval loss 0.943330549276792
accuracy:  0.67578125
ends
eval loss 0.978574473124284
accuracy:  0.6714242788461539
ends
eval loss 0.9986070898862985
accuracy:  0.6702974759615384
ends
eval loss 0.914651476419889
accuracy:  0.6864483173076923
ends
eval loss 0.9454886408952566
accuracy:  0.6832932692307693
ends
eval loss 0.9461484826528109
accuracy:  0.6825420673076923
ends
eval loss 0.91639571923476
accuracy:  0.687875600961