In [14]:
!pip install datasets
!pip install transformers[torch]
!pip install adapters
!pip install scikit-learn



In [15]:
from sklearn.metrics import accuracy_score, f1_score

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)

    # Calculate accuracy
    accuracy = accuracy_score(labels, preds)

   # Calculate precision, recall, and F1-score
    f1 = f1_score(labels, preds, average='macro')

    return {
        'accuracy': accuracy,
        'f1_macro': f1
    }

In [16]:
from datasets import load_dataset
dataset_name = "BigTMiami/amazon_helpfulness"
dataset = load_dataset(dataset_name)


Repo card metadata block was not found. Setting CardData to empty.


In [17]:
from transformers import AutoTokenizer, DataCollatorWithPadding

tokenizer = AutoTokenizer.from_pretrained("roberta-base")
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

# Unipelt No Pre lr=1e-4

In [5]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [6]:
from transformers import RobertaConfig
from adapters import AutoAdapterModel

config = RobertaConfig.from_pretrained("roberta-base")
model = AutoAdapterModel.from_pretrained(
    "roberta-base",
    config=config,
)

  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(


model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaAdapterModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['heads.default.3.bias', 'roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
adapter_hub_name = "unipelt_adapter_classification_noPre_lr0_0001"
adapter_name = "classifier_unipelt_no_pretraining"
adapter_type = "unipelt" # could be "lora", etc.

# Add a new adapter
model.add_adapter(adapter_name, config=adapter_type)

# Add head for classification modeling
model.add_classification_head(
    adapter_name,
    num_labels=2,
    id2label={ 0: "unhelpful", 1: "helpful"})

# Set the adapter to be used for training
model.train_adapter(adapter_name)

In [8]:
summary = model.adapter_summary()
summary



In [9]:
from transformers import TrainingArguments, EarlyStoppingCallback
from adapters import AdapterTrainer

training_args = TrainingArguments(
    output_dir="./adapter_unipelt_NoPre0_0001_classifier_training_output",
    overwrite_output_dir=True,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=1e-4, # Paper: this is for Classification, not domain training
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    # Lyudmila: changed from 3 to 10 -> used for small roberta
    # Lyudmila: changed back to 3 as agreed
    num_train_epochs=10,
    weight_decay=0.01,
    warmup_ratio=0.06, # Paper: warmup proportion of 0.06
    adam_epsilon=1e-6, # Paper 1e-6 (huggingface default 1e-08)
    adam_beta1=0.9, # Paper: Adam weights 0.9
    adam_beta2=0.98, # Paper: Adam weights 0.98 (huggingface default  0.999)
    lr_scheduler_type="linear",
    save_total_limit=2, # Saves latest 2 checkpoints
    push_to_hub=True,
    hub_strategy="checkpoint", # Only pushes at end with save_model()
    # Lyudmila: Changed to true -> ot seems according to repo and paper that they used early stopping and used best model
    # Lyudmila: Changed to false as agreed
    load_best_model_at_end=True, #Set to false - we want the last trained model like the paper
    # torch_compile=torch_compile,  # Much Faster
    logging_strategy="steps", # Is default
    logging_steps=100, # Logs training progress
    metric_for_best_model='f1_macro'
)

# EarlyStoppingCallback with patience
early_stopping = EarlyStoppingCallback(early_stopping_patience=3) # from paper
# callbacks=[early_stopping],

uni_no_trainer = AdapterTrainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["dev"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    callbacks=[early_stopping]
)

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False)


In [10]:
eval_results = uni_no_trainer.evaluate(dataset["test"])
eval_results

You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


{'eval_loss': 0.7912551760673523,
 'eval_accuracy': 0.1466,
 'eval_f1_macro': 0.12785627071341357,
 'eval_runtime': 262.4125,
 'eval_samples_per_second': 95.27,
 'eval_steps_per_second': 5.956}

In [11]:
training_results = uni_no_trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy,F1 Macro
1,0.3434,0.328372,0.8666,0.572567
2,0.3187,0.3216,0.8694,0.610292
3,0.3025,0.307457,0.8722,0.673906
4,0.301,0.31425,0.8748,0.681806
5,0.2858,0.309325,0.873,0.685896
6,0.2832,0.332634,0.8742,0.6471
7,0.2647,0.339953,0.8748,0.681806
8,0.2617,0.329975,0.8742,0.68791
9,0.2333,0.336477,0.8684,0.693721
10,0.207,0.343772,0.8714,0.699221




In [12]:
eval_results = uni_no_trainer.evaluate(dataset["test"])
eval_results

{'eval_loss': 0.3387288749217987,
 'eval_accuracy': 0.87504,
 'eval_f1_macro': 0.6988615145308612,
 'eval_runtime': 260.87,
 'eval_samples_per_second': 95.833,
 'eval_steps_per_second': 5.991,
 'epoch': 10.0}

In [13]:
model.push_adapter_to_hub(
    adapter_hub_name,
    adapter_name,
    datasets_tag=dataset_name
)

pytorch_model_head.bin:   0%|          | 0.00/2.37M [00:00<?, ?B/s]

pytorch_adapter.bin:   0%|          | 0.00/44.4M [00:00<?, ?B/s]

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

CommitInfo(commit_url='https://huggingface.co/jgrc3/unipelt_adapter_classification_noPre_lr0_0001/commit/b3f1efedccef2bab2075aae8e7ee45c6359bdb8a', commit_message='Upload model', commit_description='', oid='b3f1efedccef2bab2075aae8e7ee45c6359bdb8a', pr_url=None, pr_revision=None, pr_num=None)

# Pfeiffer No Pre lr=1e-4

In [18]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [19]:
from transformers import RobertaConfig
from adapters import AutoAdapterModel

config = RobertaConfig.from_pretrained("roberta-base")
model = AutoAdapterModel.from_pretrained(
    "roberta-base",
    config=config,
)

Some weights of RobertaAdapterModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['heads.default.3.bias', 'roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [20]:
adapter_hub_name = "pfeiffer_adapter_classification_noPre_lr0_0001"
adapter_name = "classifier_pfeiffer_no_pretraining"
adapter_type = "pfeiffer" # could be "lora", etc.

# Add a new adapter
model.add_adapter(adapter_name, config=adapter_type)

# Add head for classification modeling
model.add_classification_head(
    adapter_name,
    num_labels=2,
    id2label={ 0: "unhelpful", 1: "helpful"})

# Set the adapter to be used for training
model.train_adapter(adapter_name)

In [21]:
summary = model.adapter_summary()
summary



In [22]:
from transformers import TrainingArguments, EarlyStoppingCallback
from adapters import AdapterTrainer

training_args = TrainingArguments(
    output_dir="./adapter_pfeiffer_NoPre0_0001_classifier_training_output",
    overwrite_output_dir=True,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=1e-4, # Paper: this is for Classification, not domain training
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    # Lyudmila: changed from 3 to 10 -> used for small roberta
    # Lyudmila: changed back to 3 as agreed
    num_train_epochs=10,
    weight_decay=0.01,
    warmup_ratio=0.06, # Paper: warmup proportion of 0.06
    adam_epsilon=1e-6, # Paper 1e-6 (huggingface default 1e-08)
    adam_beta1=0.9, # Paper: Adam weights 0.9
    adam_beta2=0.98, # Paper: Adam weights 0.98 (huggingface default  0.999)
    lr_scheduler_type="linear",
    save_total_limit=2, # Saves latest 2 checkpoints
    push_to_hub=True,
    hub_strategy="checkpoint", # Only pushes at end with save_model()
    # Lyudmila: Changed to true -> ot seems according to repo and paper that they used early stopping and used best model
    # Lyudmila: Changed to false as agreed
    load_best_model_at_end=True, #Set to false - we want the last trained model like the paper
    # torch_compile=torch_compile,  # Much Faster
    logging_strategy="steps", # Is default
    logging_steps=100, # Logs training progress
    metric_for_best_model='f1_macro'
)

# EarlyStoppingCallback with patience
early_stopping = EarlyStoppingCallback(early_stopping_patience=3) # from paper
# callbacks=[early_stopping],

uni_no_trainer = AdapterTrainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["dev"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    callbacks=[early_stopping]
)

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False)


In [None]:
eval_results = uni_no_trainer.evaluate(dataset["test"])
eval_results

You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


In [None]:
training_results = uni_no_trainer.train()

In [None]:
eval_results = uni_no_trainer.evaluate(dataset["test"])
eval_results

In [None]:
model.push_adapter_to_hub(
    adapter_hub_name,
    adapter_name,
    datasets_tag=dataset_name
)