# PEFT- Adapters 

In [1]:
%%capture
!pip install datasets==2.12.0 adapter-transformers==3.2.1

In [None]:
!pip list |egrep adapter-transformers
!pip list |egrep ^datasets

adapter-transformers          3.2.1
datasets                      2.12.0


## Preparation for Google Collab

In [2]:
import torch, os
from torch import cuda
import numpy as np
# transformers
from transformers import AdapterTrainer
from transformers import (BertTokenizerFast, 
                          BertForSequenceClassification)
from transformers import Trainer, TrainingArguments
from datasets import load_dataset

device = 'cuda' if cuda.is_available() else 'cpu'

### Loading pre-trained model and tokenizer

In [3]:
model_path= 'bert-base-uncased'
tokenizer = BertTokenizerFast.from_pretrained(model_path)

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

## Loading popular IMDB dataset

In [None]:
# to take smaller portion 4K for train, 1K for test and 1K for validation
imdb_train= load_dataset('imdb', split="train[:2000]+train[-2000:]")
imdb_test= load_dataset('imdb', split="test[:500]+test[-500:]")
imdb_val= load_dataset('imdb', split="test[500:1000]+test[-1000:-500]")
imdb_train.shape, imdb_test.shape, imdb_val.shape

In [7]:
imdb_train.shape, imdb_test.shape, imdb_val.shape

((4000, 2), (1000, 2), (1000, 2))

In [8]:
def tokenize_it(e):
  return tokenizer(e['text'], 
                   padding=True, 
                   truncation=True)

enc_train=imdb_train.map(tokenize_it, batched=True, batch_size=1000)
enc_test=imdb_test.map(tokenize_it, batched=True, batch_size=1000) 
enc_val=imdb_val.map(tokenize_it, batched=True, batch_size=1000)

Map:   0%|          | 0/4000 [00:00<?, ? examples/s]



# Loading model for fine-tuning

## Preparing training settings with TrainingArguments and Trainer class

TrainingArguments is the subset of the arguments we use in our example scripts **which relate to the training loop
itself**.

In [9]:
training_args = TrainingArguments(
    "/tmp",
    do_train=True,
    do_eval=True,
    num_train_epochs=3,
    learning_rate=2e-4,              
    per_device_train_batch_size=16,  
    per_device_eval_batch_size=16,
    warmup_steps=100,                
    weight_decay=0.01,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    fp16=True,
    load_best_model_at_end=True
)

Let's design our evaluation metrics as follows:

In [10]:

def compute_acc(p):
  preds = np.argmax(p.predictions, axis=1)
  acc={"Accuracy": (preds == p.label_ids).mean()}
  return acc

# PEFT-adapters

In [11]:
from transformers import BertModelWithHeads
model = BertModelWithHeads\
    .from_pretrained(model_path)



Downloading pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModelWithHeads: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModelWithHeads from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModelWithHeads from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
# we add an adapter and named imdb_sentiment
model.add_adapter("imdb_sentiment")
# we add a classification head and asscociate it with added adapter
model.add_classification_head(
    "imdb_sentiment",
    num_labels=2)

# we tell the training process that added adpater will be trained!
model.train_adapter("imdb_sentiment")

In [28]:
# we count them in Millions
trainable_params=model.num_parameters(only_trainable=True)/(2**20) 
all_params=model.num_parameters() /2**20
print(f"{all_params=:.2f} M\n"+
      f"{trainable_params=:.2f} M\n"+
      f"The efficiency ratio is \
      {100*trainable_params/all_params:.2f}%")

all_params=105.83 M
trainable_params=1.42 M
The efficiency ratio is       1.34%


In [19]:

trainer = AdapterTrainer(
    model=model,
    args=training_args,
    train_dataset=enc_train,
    eval_dataset=enc_val,
    compute_metrics=compute_acc,
)

Using cuda_amp half precision backend


In [20]:
trainer.train()

The following columns in the training set don't have a corresponding argument in `BertModelWithHeads.forward` and have been ignored: text. If text are not expected by `BertModelWithHeads.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 4000
  Num Epochs = 3
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 750
  Number of trainable parameters = 1486658


Epoch,Training Loss,Validation Loss,Accuracy
1,No log,0.261302,0.89
2,0.317400,0.252638,0.895
3,0.317400,0.248115,0.911


The following columns in the evaluation set don't have a corresponding argument in `BertModelWithHeads.forward` and have been ignored: text. If text are not expected by `BertModelWithHeads.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1000
  Batch size = 16
Saving model checkpoint to /tmp/checkpoint-250
Configuration saved in /tmp/checkpoint-250/imdb_sentiment/adapter_config.json
Module weights saved in /tmp/checkpoint-250/imdb_sentiment/pytorch_adapter.bin
Configuration saved in /tmp/checkpoint-250/imdb_sentiment/head_config.json
Module weights saved in /tmp/checkpoint-250/imdb_sentiment/pytorch_model_head.bin
Configuration saved in /tmp/checkpoint-250/imdb_sentiment/head_config.json
Module weights saved in /tmp/checkpoint-250/imdb_sentiment/pytorch_model_head.bin
The following columns in the evaluation set don't have a corresponding argument in `BertModelWithHeads.forward` and have been ignored: text. If text are not expected by `BertM

TrainOutput(global_step=750, training_loss=0.27304420979817706, metrics={'train_runtime': 98.1022, 'train_samples_per_second': 122.321, 'train_steps_per_second': 7.645, 'total_flos': 3212080128000000.0, 'train_loss': 0.27304420979817706, 'epoch': 3.0})

In [22]:
import pandas as pd
q=[trainer.evaluate(eval_dataset=data) for data in [enc_train, enc_val, enc_test]]
pd.DataFrame(q, index=["train","val","test"]).iloc[:,:5]

The following columns in the evaluation set don't have a corresponding argument in `BertModelWithHeads.forward` and have been ignored: text. If text are not expected by `BertModelWithHeads.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 4000
  Batch size = 16


The following columns in the evaluation set don't have a corresponding argument in `BertModelWithHeads.forward` and have been ignored: text. If text are not expected by `BertModelWithHeads.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1000
  Batch size = 16
The following columns in the evaluation set don't have a corresponding argument in `BertModelWithHeads.forward` and have been ignored: text. If text are not expected by `BertModelWithHeads.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1000
  Batch size = 16


Unnamed: 0,eval_loss,eval_Accuracy,eval_runtime,eval_samples_per_second,eval_steps_per_second
train,0.151642,0.94575,13.0559,306.374,19.148
val,0.248115,0.911,3.248,307.885,19.397
test,0.21932,0.919,3.2568,307.052,19.344
