In [1]:
!pip --version

# To install pytorch -------------------------------------------------------------------------------------
!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
# To install Tensorflow ----------------------------------------------------------------------------------
# Requires the latest pip
!pip install --upgrade pip

# Current stable release for CPU and GPU
!pip install tensorflow

# To install Flax ----------------------------------------------------------------------------------------
!pip install flax

# or to install the latest version of Flax:
!pip install --upgrade git+https://github.com/google/flax.git

# Set up transformers
!pip install git+https://github.com/huggingface/transformers

# Check if installed
!python -c "from transformers import pipeline; print(pipeline('sentiment-analysis')('I love you'))"

!pip install transformers[torch]

!pip install accelerate -U

!pip install -q datasets peft evaluate

!pip install opacus

pip 23.1.2 from /usr/local/lib/python3.10/dist-packages/pip (python 3.10)
Looking in indexes: https://download.pytorch.org/whl/cu118
Collecting pip
  Downloading pip-23.3.1-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 23.1.2
    Uninstalling pip-23.1.2:
      Successfully uninstalled pip-23.1.2
Successfully installed pip-23.3.1
[0mCollecting git+https://github.com/google/flax.git
  Cloning https://github.com/google/flax.git to /tmp/pip-req-build-_tkelzzr
  Running command git clone --filter=blob:none --quiet https://github.com/google/flax.git /tmp/pip-req-build-_tkelzzr
  Resolved https://github.com/google/flax.git to commit b468207650e7fd2b9b4b035dbbec6176d9ae3734
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Instal

In [7]:
from transformers import (AutoModelForSequenceClassification, AutoTokenizer, DataCollatorWithPadding, TrainingArguments, Trainer )
from peft import ( get_peft_config, get_peft_model, get_peft_model_state_dict, set_peft_model_state_dict, PeftType,PromptEncoderConfig,PeftModelForSequenceClassification)
from peft import PromptEmbedding, PromptTuningConfig
from datasets import load_dataset
from opacus.validators import ModuleValidator
import evaluate
from transformers import AdamW
import torch
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score
import opacus
from opacus import PrivacyEngine
from opacus.grad_sample import GradSampleModule


dataset = load_dataset("sst2")
model = AutoModelForSequenceClassification.from_pretrained("prajjwal1/bert-tiny", num_labels=2) # as output 0 or 1
tokenizer = AutoTokenizer.from_pretrained("prajjwal1/bert-tiny", padding_side = "right")
model = model.to('cuda')
# optimizer = AdamW(model.parameters(), lr= 0.005, eps = 1e-8)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [8]:
config = PromptTuningConfig(
    peft_type="PROMPT_TUNING",
    task_type="SEQ_CLS",
    num_virtual_tokens= 100,
    prompt_tuning_init="TEXT",
    prompt_tuning_init_text="Classify the sentiment of this review as positive or negative",
    tokenizer_name_or_path="prajjwal1/bert-tiny",
)

model = PeftModelForSequenceClassification(model, config)
# model = get_peft_model(model, config)
print(model.print_trainable_parameters())

total_params = 0
trainable_params = 0

model = ModuleValidator.fix(model)
model = GradSampleModule(model)

optimizer = AdamW(model.parameters() , lr= 0.005, eps = 1e-8)

trainable params: 13,058 || all params: 4,399,236 || trainable%: 0.29682426675904633
None




In [None]:
trainable_layers = [model.prompt_encoder, model.classifier]
for p in model.parameters():
        p.requires_grad = False
        total_params += p.numel()

l = []
for layer in trainable_layers:
    for p in layer.parameters():
        l.append(p)
        p.requires_grad = True
        trainable_params += p.numel()

optimizer = AdamW(l , lr= 0.005, eps = 1e-8)

print("total:",total_params)
print("trainable_param",trainable_params)

In [9]:
# Preprocessing
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from sklearn.model_selection import train_test_split
from datasets import Dataset

# Assuming the dataset has a 'train' split, modify this according to your dataset's splits
data_split = dataset['train']

# Convert the dataset split to a pandas DataFrame for easier splitting
df = data_split.to_pandas()

# Split the dataset into train and test sets using train_test_split from sklearn
# df1, df2 = train_test_split(df, test_size = 0.5, random_state = 42)
train_df, test_df = train_test_split(df , test_size=0.2, random_state=42)

# Convert the splits back to datasets
train_dataset = train_df.reset_index(drop=True)
test_dataset = test_df.reset_index(drop=True)

train_dataset = Dataset.from_pandas(train_dataset)
test_dataset = Dataset.from_pandas(test_dataset)

x_train = list(train_dataset["sentence"])
y_train = list(train_dataset["label"])

x_test = list(test_dataset["sentence"])
y_test = list(test_dataset["label"])

X_train_tokenized = tokenizer(x_train, padding=True, truncation=True, max_length=512)
# X_val_tokenized = tokenizer(x_validation, padding=True, truncation=True, max_length=512)
X_test_tokenized = tokenizer(x_test, padding=True, truncation = True, max_length = 512)

class Dataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels=None):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]).to('cuda') for key, val in self.encodings.items()}
        if self.labels:
            item["labels"] = torch.tensor(self.labels[idx]).to('cuda')
        return item

    def __len__(self):
        return len(self.encodings["input_ids"])

train_dataset = Dataset(X_train_tokenized, y_train)
# val_dataset = Dataset(X_val_tokenized, y_validation)
test_dataset = Dataset(X_test_tokenized, y_test)

train_dataloader = DataLoader(train_dataset, batch_size = 1024, drop_last = True)
test_dataloader = DataLoader(test_dataset, sampler=SequentialSampler(test_dataset), batch_size = 1024, drop_last = True)


In [10]:

model.train()
privacy_engine = PrivacyEngine()
model, optimizer, train_dataloader = privacy_engine.make_private_with_epsilon(
    module=model,
    optimizer=optimizer,
    data_loader=train_dataloader,
    target_delta= 1 / len(train_dataloader),
    target_epsilon= 8,
    epochs= 3,
    max_grad_norm = 0.1,
)


  z = np.log((np.exp(t) + q - 1) / q)


In [11]:
def accuracy(preds, labels):
    return (preds == labels).mean()

# define evaluation cycle
def evaluate(model):
    model.eval()

    loss_arr = []
    accuracy_arr = []

    for batch in test_dataloader:
        #batch = tuple(t.to("cuda") for t in batch)

        with torch.no_grad():
            input_ids = batch['input_ids']
            attention_mask = batch['attention_mask']
            labels = batch['labels']

            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss, logits = outputs[:2]

            log = logits.cpu()
            log = log.numpy()

            preds = np.argmax(log, axis=1)
            labels = batch['labels'].cpu().numpy()

            loss_arr.append(loss.item())
            accuracy_arr.append(accuracy(preds, labels))

    model.train()
    return np.mean(loss_arr), np.mean(accuracy_arr)

# Training loop
optimizer.zero_grad()  # Explicitly zero the gradient buffers

for epoch in range(60):  # Number of epochs
    model.train()
    for batch in train_dataloader:
        optimizer.zero_grad()
        input_ids = batch['input_ids']
        attention_mask = batch['attention_mask']
        labels = batch['labels']

        # print(f"input_ids size: {input_ids.size()}")
        # print(f"attention_mask size: {attention_mask.size()}")
        # print(f"labels size: {labels.size()}")

        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        # print(outputs)
        loss = outputs[0]
        # print(loss)
        loss.backward()


        optimizer.step()

        logits = outputs[1]
        predictions = torch.argmax(logits, dim = -1)
        #metric.add_batch(predictions = predictions, references = batch["labels"])

    eval_loss, eval_accuracy = evaluate(model)
    print("eval loss",eval_loss)
    print("accuracy: ",eval_accuracy)
    print("ends")
    #metric.compute()
    #print(metric)

    # Validation
    model.eval()
    for batch in test_dataloader:
        with torch.no_grad():
            input_ids = batch['input_ids']
            attention_mask = batch['attention_mask']
            labels = batch['labels']

            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            # Further validation steps if needed

    model.train()  # Set the model back to training mode




eval loss 2.194364309310913
accuracy:  0.5612980769230769
ends
eval loss 1.8514524056361272
accuracy:  0.5612980769230769
ends
eval loss 1.2522773100779607
accuracy:  0.6719501201923077
ends
eval loss 1.3380948855326726
accuracy:  0.7035757211538461
ends
eval loss 1.4114163288703332
accuracy:  0.7157451923076923
ends
eval loss 1.5050396460753221
accuracy:  0.7309194711538461
ends
eval loss 1.5210363864898682
accuracy:  0.7359525240384616
ends
eval loss 1.4325220126372118
accuracy:  0.7437650240384616
ends
eval loss 1.6567405003767748
accuracy:  0.7410606971153846
ends
eval loss 1.648023064319904
accuracy:  0.7399338942307693
ends
eval loss 1.4487229035450861
accuracy:  0.7471454326923077
ends
eval loss 1.442643871674171
accuracy:  0.7536808894230769
ends
eval loss 1.4154222653462336
accuracy:  0.7490234375
ends
eval loss 1.3541116897876446
accuracy:  0.7503756009615384
ends
eval loss 1.3996614401157086
accuracy:  0.75
ends
eval loss 1.4486879843931932
accuracy:  0.7508263221153846
ends