# Summary

Adapter model training and evaluation modeled after Don't Stop Pretraining paper

# Setup

In [None]:
!pip install datasets
!pip install huggingface_hub
!pip install scikit-learn
!pip install transformers[torch]

Collecting datasets
  Downloading datasets-2.18.0-py3-none-any.whl (510 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m510.5/510.5 kB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m15.2 MB/s[0m eta [36m0:00:00[0m
Collecting xxhash (from datasets)
  Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m12.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting multiprocess (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m14.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: xxhash, dill, multiprocess, datasets
Successfully installed datase

In [None]:
!pip install -qq adapters

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m256.0/256.0 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.2/8.2 MB[0m [31m92.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
from transformers import (
    AutoConfig,
    AutoModelForSequenceClassification,
    AutoTokenizer,
    DataCollatorWithPadding,
    Trainer,
    TrainingArguments,
)
from sklearn.metrics import accuracy_score,  f1_score

from datasets import load_dataset

  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(


Create function for metrics for evaluation

In [None]:
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)

    # Calculate accuracy
    accuracy = accuracy_score(labels, preds)

   # Calculate precision, recall, and F1-score
    f1 = f1_score(labels, preds, average='macro')

    return {
        'accuracy': accuracy,
        'f1_macro': f1
    }

Load the tokenizer - Roberta Base

In [None]:
tokenizer = AutoTokenizer.from_pretrained("roberta-base", truncation=True, padding=True)

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [None]:
#Label Settings
id2label = {0: "unhelpful", 1: "helpful"}
label2id = {"unhelpful": 0, "helpful": 1}

In [None]:
# Set Classifier Settings
classification_config = AutoConfig.from_pretrained("BigTMiami/test_model") # NEED TO CHANGE FOR REAL RUN
classification_config.classifier_dropout = 0.1 # From Paper
classification_config.num_of_labels = 2
classification_config.id2label=id2label
classification_config.label2id=label2id,
classification_config

config.json:   0%|          | 0.00/672 [00:00<?, ?B/s]

RobertaConfig {
  "_name_or_path": "BigTMiami/test_model",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": 0.1,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "unhelpful",
    "1": "helpful"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": [
    {
      "helpful": 1,
      "unhelpful": 0
    }
  ],
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "num_of_labels": 2,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.36.2",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 50265
}

# Model

In [None]:
from transformers import RobertaTokenizer
from transformers import RobertaConfig
from adapters import AutoAdapterModel

# Pretrained RoBERTa model from Hugging Face
### LOAD TONY'S BASELINE MODEL
model = AutoAdapterModel.from_pretrained(
    "roberta-base",
    config=classification_config,
)

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaAdapterModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['heads.default.3.bias', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# Add adapters to model
# Make sure to replicate Don't Stop Pretraining Format


# Training

## DAPT

Continue pretraining ROBERTA on a large corpus of unlabeled
domain-specific text (Reviews dataset).

In [None]:
from datasets import load_dataset, DatasetDict

split = 'train[0:1000000]' # reduce the working size to speed up iteration

full_dataset = load_dataset("BigTMiami/amazon_25M_reviews_condensed", split=split)

# 90% train, 10% test + validation
train_testvalid = full_dataset.train_test_split(test_size=0.1)

# Split the 10% test + valid in half test, half valid
test_valid = train_testvalid['test'].train_test_split(test_size=0.5)

# gather everyone if you want to have a single DatasetDict
domain_dataset = DatasetDict({'train': train_testvalid['train'], 'test': test_valid['test'], 'validation': test_valid['train']})

Downloading readme:   0%|          | 0.00/573 [00:00<?, ?B/s]

Resolving data files:   0%|          | 0/58 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/58 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/158M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/17.8M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/18.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/4309656 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/8360 [00:00<?, ? examples/s]

Generating val split:   0%|          | 0/8453 [00:00<?, ? examples/s]

In [None]:
# Process domain dataset to be batch size


In [None]:
# Model
dapt_model = AutoAdapterModel.from_pretrained(
    "roberta-base",
    config=classification_config,
)

# Add a new adapter to model
dapt_model.add_adapter("review_adapter", config="seq_bn")

# Add a matching classification head
dapt_model.add_classification_head(
    "review_adapter",
    num_labels=2,
    id2label={ 0: "👎", 1: "👍"}
  )

# Activate the adapter
dapt_model.train_adapter("review_adapter")

In [None]:
# Training
import numpy as np
from transformers import TrainingArguments, EvalPrediction
from adapters import AdapterTrainer

training_args = TrainingArguments(
    learning_rate=2e-5,
    adam_epsilon=1e-6,
    num_train_epochs=100,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    gradient_accumulation_steps=256,
    logging_steps=200,
    adam_beta1 = 0.9,
    adam_beta2 = 0.98,
    weight_decay = 0.01,
    warmup_ratio = 0.06,
    # masking_probability=0.15,
    lr_scheduler_type="linear",
    output_dir="./adapter_dapt_reviews",
    overwrite_output_dir=True,
    # The next line is important to ensure the dataset labels are properly passed to the model
    remove_unused_columns=False,
)

def compute_accuracy(p: EvalPrediction):
  preds = np.argmax(p.predictions, axis=1)
  return {"acc": (preds == p.label_ids).mean()}

dapt_trainer = AdapterTrainer(
    model=dapt_model,
    args=training_args,
    train_dataset=domain_dataset["train"],
    eval_dataset=domain_dataset["validation"],
    compute_metrics=compute_accuracy,
)

In [None]:
# TRAINING
dapt_trainer.train()

In [None]:
dapt_trainer.evaluate()

In [None]:
from huggingface_hub import notebook_login

notebook_login()

In [None]:
dapt_trainer.push_to_hub("RobertaAdapter_reviews_DAPT")

TAPT

In [None]:
task_dataset = load_dataset("BigTMiami/amazon_helpfulness")

Downloading readme:   0%|          | 0.00/613 [00:00<?, ?B/s]



Downloading data:   0%|          | 0.00/40.5M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/8.82M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.78M [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

Generating dev split: 0 examples [00:00, ? examples/s]

In [None]:
task_model = AutoAdapterModel.from_pretrained(
    "roberta-base",
    config=classification_config,
)

# Add a new adapter to model
task_model.add_adapter("task_review_helpfulness_adapter", config="seq_bn")

# Add a matching classification head
task_model.add_classification_head(
    "reviews_helpfulness_adapter",
    num_labels=2,
    id2label={ 0: "👎", 1: "👍"}
  )

# Activate the adapter
task_model.train_adapter("task_review_helpfulness_adapter")

Some weights of RobertaAdapterModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['heads.default.3.bias', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# Use Adapter Training Class
import numpy as np
from transformers import TrainingArguments, EvalPrediction
from adapters import AdapterTrainer

training_args = TrainingArguments(
    learning_rate=2e-5,
    adam_epsilon=1e-6,
    num_train_epochs=100,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    gradient_accumulation_steps=256,
    logging_steps=200,
    adam_beta1 = 0.9,
    adam_beta2 = 0.98,
    weight_decay = 0.01,
    warmup_ratio = 0.06,
    # masking_probability=0.15,
    lr_scheduler_type="linear",
    output_dir="./adapter_tapt_helpfulness",
    overwrite_output_dir=True,
    # The next line is important to ensure the dataset labels are properly passed to the model
    remove_unused_columns=False,
)

def compute_accuracy(p: EvalPrediction):
  preds = np.argmax(p.predictions, axis=1)
  return {"acc": (preds == p.label_ids).mean()}

task_trainer = AdapterTrainer(
    model=task_model,
    args=training_args,
    tokenizer=tokenizer,
    data_collator=data_collator,
    train_dataset=task_dataset["train"],
    eval_dataset=task_dataset["dev"],
    compute_metrics=compute_accuracy,
)

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False)


In [None]:
# TRAINING
task_trainer.train()

Step,Training Loss


Step,Training Loss


In [None]:
# Evaluate
task_trainer.evaluate()

In [None]:
from huggingface_hub import notebook_login

notebook_login()

In [None]:
task_trainer.push_to_hub("RobertaAdapter_helpfulness_TAPT")

In [None]:
# Save adapter to hugging face
task_trainer.save_model("RobertaAdapter_helpfulnessModel")

# Evaluation

In [None]:
from transformers import TextClassificationPipeline

classifier = TextClassificationPipeline(model=task_model, tokenizer=tokenizer, device=training_args.device.index)

classifier("This is awesome!")

In [None]:
# Save trained adapter
task_model.save_adapter("./test_adapter1", "rotten_tomatoes")

!ls -lh final_adapter