In [1]:
!pip install -U adapter-transformers
!pip install datasets

Collecting adapter-transformers
  Downloading adapter_transformers-3.2.1-py3-none-any.whl (6.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.4/6.4 MB[0m [31m16.9 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0 (from adapter-transformers)
  Downloading huggingface_hub-0.17.1-py3-none-any.whl (294 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m294.8/294.8 kB[0m [31m29.2 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from adapter-transformers)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m41.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, adapter-transformers
Successfully installed adapter-transformers-3.2.1 huggingface-hub-0.17.1 tokenizers-0.13.3
Collecting datasets
  Downloading datasets-2.14.5-py3-none-any.whl

In [2]:
import torch
import pandas as pd
import re
import pickle
import os
from torch.utils.data import Dataset, TensorDataset, DataLoader, SequentialSampler, RandomSampler
from torch.nn.utils.rnn import pad_sequence
from transformers import BertTokenizer
from transformers.adapters import PrefixTuningConfig, LoRAConfig, AdapterConfig, MAMConfig, ConfigUnion, ParallelConfig

from datasets import load_dataset
from torch.utils.data import Dataset, TensorDataset, DataLoader, SequentialSampler, RandomSampler
from transformers import RobertaTokenizer, BertTokenizer, AutoTokenizer, AutoAdapterModel
from transformers import RobertaConfig, BertConfig, BertModel

import numpy as np
from transformers import TrainingArguments, AdapterTrainer, EvalPrediction
trainDataset, validation_matchedDataset, validation_mismatchedDataset = load_dataset("multi_nli", split=['train', 'validation_matched', 'validation_mismatched'])

# trainDataset, validation_matchedDataset, validation_mismatchedDataset = load_dataset("multi_nli", split=['train[:2000]', 'validation_matched', 'validation_mismatched'])
# train_10_20_ds = datasets.load_dataset('bookcorpus', split='train[10:20]')

Downloading builder script:   0%|          | 0.00/5.14k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/2.88k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/8.67k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/227M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/392702 [00:00<?, ? examples/s]

Generating validation_matched split:   0%|          | 0/9815 [00:00<?, ? examples/s]

Generating validation_mismatched split:   0%|          | 0/9832 [00:00<?, ? examples/s]

In [3]:
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
def encode_batch(batch):
  """Encodes a batch of input data using the model tokenizer."""
  return tokenizer(
      batch["premise"],
      batch["hypothesis"],
      max_length=256,
      truncation=True,
      padding="max_length"
  )
trainDataset = trainDataset.map(encode_batch, batched=True)
validation_matchedDataset = validation_matchedDataset.map(encode_batch, batched=True)
validation_mismatchedDataset = validation_mismatchedDataset.map(encode_batch, batched=True)

trainDataset = trainDataset.rename_column("label", "labels")
validation_matchedDataset = validation_matchedDataset.rename_column("label", "labels")
validation_mismatchedDataset = validation_mismatchedDataset.rename_column("label", "labels")

trainDataset.set_format(type='torch', columns = ['input_ids', 'attention_mask', 'labels'])
validation_matchedDataset.set_format(type='torch', columns = ['input_ids', 'attention_mask', 'labels'])
validation_mismatchedDataset.set_format(type='torch', columns = ['input_ids', 'attention_mask', 'labels'])


Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Map:   0%|          | 0/392702 [00:00<?, ? examples/s]

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

Map:   0%|          | 0/9815 [00:00<?, ? examples/s]

Map:   0%|          | 0/9832 [00:00<?, ? examples/s]

In [4]:
config_bert = BertConfig.from_pretrained("bert-base-uncased", num_labels=3) # what is bertconfig
config_LoRA = LoRAConfig(r=8, alpha=16)
config_bottleneck = AdapterConfig(mh_adapter=True, output_adapter=True, reduction_factor=16, non_linearity="relu")
config_prefix = PrefixTuningConfig(flat=False, prefix_length=30)

config_mixmax = ConfigUnion(
    PrefixTuningConfig(bottleneck_size=800),
    ParallelConfig(),
)




def compute_accuracy(p: EvalPrediction):
  preds = np.argmax(p.predictions, axis=1)
  return {"acc": (preds == p.label_ids).mean()}


In [None]:
# "id2label":{
#       "entailment":"0",
#       "contradiction": "1",
#       "neutral":"2"
#     },

#LoRA

In [None]:
training_args = TrainingArguments(
    learning_rate=1e-4,
    num_train_epochs=1,
    per_device_train_batch_size=64,
    per_device_eval_batch_size=64,
    logging_steps=200,
    output_dir="./training_outputLoRA",
    overwrite_output_dir=True,
    # The next line is important to ensure the dataset labels are properly passed to the model
    remove_unused_columns=False,
)

In [None]:
# train second
model = AutoAdapterModel.from_pretrained("bert-base-uncased", config=config_bert)
# model.add_classification_head("mnli_lora")
adapterLoRA = model.load_adapter("/content/AdapterModelLora", config = config_LoRA)
model.set_active_adapters(adapterLoRA)
print('Training...LoRA')
model.train_adapter(adapterLoRA)
trainer = AdapterTrainer(
  model=model,
  args=training_args,
  train_dataset = trainDataset,
  eval_dataset=validation_matchedDataset,
  compute_metrics=compute_accuracy)
trainer.train()
model.save_adapter("./AdapterModelLora_second/", "mnli_lora", with_head=True)

# model = AutoAdapterModel.from_pretrained("bert-base-uncased", config=config_bert)
# model.add_adapter("mnli_lora", config = config_LoRA)
# model.add_classification_head("mnli_lora", num_labels=3, id2label={'entailment': 0, 'contradiction': 1, 'neutral': 2})
# model.train_adapter("mnli_lora")
# model.set_active_adapters("mnli_lora")
# print('Training...LoRA')
# trainer = AdapterTrainer(
#   model=model,
#   args=training_args,
#   train_dataset = trainDataset,
#   eval_dataset=validation_matchedDataset,
#   compute_metrics=compute_accuracy)
# trainer.train()
# model.save_adapter("./AdapterModelLora/", "mnli_lora", with_head=True)

Downloading pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertAdapterModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertAdapterModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertAdapterModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Training...LoRA


***** Running training *****
  Num examples = 392702
  Num Epochs = 1
  Instantaneous batch size per device = 64
  Total train batch size (w. parallel, distributed & accumulation) = 64
  Gradient Accumulation steps = 1
  Total optimization steps = 6136
  Number of trainable parameters = 887811


Step,Training Loss
200,0.7737
400,0.7509
600,0.7378
800,0.7274
1000,0.6969
1200,0.6955
1400,0.7035
1600,0.6892
1800,0.6834
2000,0.6719


Saving model checkpoint to ./training_output/checkpoint-500
Configuration saved in ./training_output/checkpoint-500/mnli_lora/adapter_config.json
Module weights saved in ./training_output/checkpoint-500/mnli_lora/pytorch_adapter.bin
Configuration saved in ./training_output/checkpoint-500/mnli_lora/head_config.json
Module weights saved in ./training_output/checkpoint-500/mnli_lora/pytorch_model_head.bin
Configuration saved in ./training_output/checkpoint-500/mnli_lora/head_config.json
Module weights saved in ./training_output/checkpoint-500/mnli_lora/pytorch_model_head.bin
Saving model checkpoint to ./training_output/checkpoint-1000
Configuration saved in ./training_output/checkpoint-1000/mnli_lora/adapter_config.json
Module weights saved in ./training_output/checkpoint-1000/mnli_lora/pytorch_adapter.bin
Configuration saved in ./training_output/checkpoint-1000/mnli_lora/head_config.json
Module weights saved in ./training_output/checkpoint-1000/mnli_lora/pytorch_model_head.bin
Configurat

In [None]:
print('Evaluating on match...')
trainer.evaluate()

***** Running Evaluation *****
  Num examples = 9815
  Batch size = 64


Evaluating on match...


{'eval_loss': 0.7376929521560669,
 'eval_acc': 0.6730514518593989,
 'eval_runtime': 150.7443,
 'eval_samples_per_second': 65.11,
 'eval_steps_per_second': 1.022,
 'epoch': 1.0}

In [None]:
print('Evaluating on mismatch...')
trainer = AdapterTrainer(
  model=model,
  args=training_args,
  eval_dataset=validation_mismatchedDataset,
  compute_metrics=compute_accuracy)
trainer.evaluate()

***** Running Evaluation *****
  Num examples = 9832
  Batch size = 64


Evaluating on mismatch...


{'eval_loss': 0.7056222558021545,
 'eval_acc': 0.6942636289666395,
 'eval_runtime': 150.8513,
 'eval_samples_per_second': 65.177,
 'eval_steps_per_second': 1.021}

# Prefix

In [5]:
# train second
training_args = TrainingArguments(
    learning_rate=8e-5,
    num_train_epochs=1,
    per_device_train_batch_size=64,
    per_device_eval_batch_size=64,
    logging_steps=200,
    output_dir="./training_outputPrefix",
    overwrite_output_dir=True,
    # The next line is important to ensure the dataset labels are properly passed to the model
    remove_unused_columns=False,
)


model = AutoAdapterModel.from_pretrained("bert-base-uncased", config=config_bert)
adapterPrefix = model.load_adapter("/content/AdapterModelPrefix", config = config_prefix)
model.set_active_adapters(adapterPrefix)
print('Training...Prefix')
model.train_adapter(adapterPrefix)
trainer = AdapterTrainer(
  model=model,
  args=training_args,
  train_dataset = trainDataset,
  eval_dataset=validation_matchedDataset,
  compute_metrics=compute_accuracy)
trainer.train()
model.save_adapter("./AdapterModelPrefix_second/", "mnli_prefix", with_head=True)

Downloading pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertAdapterModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertAdapterModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertAdapterModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Training...Prefix


***** Running training *****
  Num examples = 392702
  Num Epochs = 1
  Instantaneous batch size per device = 64
  Total train batch size (w. parallel, distributed & accumulation) = 64
  Gradient Accumulation steps = 1
  Total optimization steps = 6136
  Number of trainable parameters = 10465283


Step,Training Loss
200,0.6352
400,0.6374
600,0.6264
800,0.6273
1000,0.61
1200,0.6195
1400,0.6234
1600,0.6114
1800,0.606
2000,0.5989


Saving model checkpoint to ./training_outputPrefix/checkpoint-500
Configuration saved in ./training_outputPrefix/checkpoint-500/mnli_prefix/adapter_config.json
Module weights saved in ./training_outputPrefix/checkpoint-500/mnli_prefix/pytorch_adapter.bin
Configuration saved in ./training_outputPrefix/checkpoint-500/mnli_prefix/head_config.json
Module weights saved in ./training_outputPrefix/checkpoint-500/mnli_prefix/pytorch_model_head.bin
Configuration saved in ./training_outputPrefix/checkpoint-500/mnli_prefix/head_config.json
Module weights saved in ./training_outputPrefix/checkpoint-500/mnli_prefix/pytorch_model_head.bin
Saving model checkpoint to ./training_outputPrefix/checkpoint-1000
Configuration saved in ./training_outputPrefix/checkpoint-1000/mnli_prefix/adapter_config.json
Module weights saved in ./training_outputPrefix/checkpoint-1000/mnli_prefix/pytorch_adapter.bin
Configuration saved in ./training_outputPrefix/checkpoint-1000/mnli_prefix/head_config.json
Module weights sa

In [16]:
model = AutoAdapterModel.from_pretrained("bert-base-uncased", config=config_bert)
adapterLoRA = model.load_adapter("/content/AdapterModelPrefix_second", config = config_LoRA)
trainer = AdapterTrainer(
  model=model,
  args=training_args,
  eval_dataset=validation_matchedDataset,
  compute_metrics=compute_accuracy)
print('Evaluating on match...')
trainer.evaluate()

loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--bert-base-uncased/snapshots/1dbc166cf8765166998eff31ade2eb64c8a40076/pytorch_model.bin
Generate config GenerationConfig {
  "pad_token_id": 0,
  "transformers_version": "4.26.1"
}

Adding head 'mnli_prefix' with config {'head_type': 'classification', 'num_labels': 3, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'0': 'entailment', '1': 'contradiction', '2': 'neutral'}, 'use_pooler': False, 'bias': True}.
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertAdapterModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertAdapterModel from the checkpoint of a model t

Evaluating on match...


{'eval_loss': 0.5367374420166016,
 'eval_acc': 0.7832908813041264,
 'eval_runtime': 169.8051,
 'eval_samples_per_second': 57.802,
 'eval_steps_per_second': 0.907}

In [15]:
print('Evaluating on mismatch...')
trainer = AdapterTrainer(
  model=model,
  args=training_args,
  eval_dataset=validation_mismatchedDataset,
  compute_metrics=compute_accuracy)
trainer.evaluate()

loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--bert-base-uncased/snapshots/1dbc166cf8765166998eff31ade2eb64c8a40076/pytorch_model.bin
Generate config GenerationConfig {
  "pad_token_id": 0,
  "transformers_version": "4.26.1"
}

Adding head 'mnli_prefix' with config {'head_type': 'classification', 'num_labels': 3, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'0': 'entailment', '1': 'contradiction', '2': 'neutral'}, 'use_pooler': False, 'bias': True}.
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertAdapterModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertAdapterModel from the checkpoint of a model t

Evaluating on mismatch...


{'eval_loss': 0.52068692445755,
 'eval_acc': 0.7943449959316518,
 'eval_runtime': 168.0202,
 'eval_samples_per_second': 58.517,
 'eval_steps_per_second': 0.917}