# Training the adapters

In [None]:
!pip install adapters




















In [None]:
!pip install transformers[torch]








































In [None]:
!pip install datasets

Collecting datasets

  Downloading datasets-2.19.1-py3-none-any.whl (542 kB)

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m542.0/542.0 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m





Collecting dill<0.3.9,>=0.3.0 (from datasets)

  Downloading dill-0.3.8-py3-none-any.whl (116 kB)

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m




Collecting xxhash (from datasets)

  Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m

[?25hCollecting multiprocess (from datasets)

  Downloading multiprocess-0.70.16-py310-none-any.whl (134 kB)

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m



Collecting huggingface-hub>=0.21.2 (from datasets)

  Downloading huggingface_h

In [None]:
from transformers import BertTokenizer, TrainingArguments, TrainerCallback
from adapters import AutoAdapterModel
import adapters
from datasets import load_dataset
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import numpy as np

# Define the task
task = "NLI"  # Change this to "SentimentAnalysis" for sentiment analysis task

print("Loading model and tokenizer...")
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = AutoAdapterModel.from_pretrained("bert-base-uncased")

print("Loading adapter...")
adapter_path = "/content/Adapter" if task == "NLI" else "/kaggle/working/Adapter"
adapter = model.load_adapter(adapter_path, with_head=False)

print("Adding new classification head...")
# Add a new classification head for the new adapter
num_labels = 3 if task == "NLI" else 2
model.add_classification_head(f"{task}_Head", num_labels=num_labels)

model.set_active_adapters(adapter)

print("Loading datasets...")
if task == "NLI":
    # load maximedb/sick_nl dataset
    train_dataset = load_dataset("maximedb/sick_nl", split="train")
    test_dataset = load_dataset("maximedb/sick_nl", split="test")
    validation_dataset = load_dataset("maximedb/sick_nl", split="validation")
    columns_to_keep = ['sentence_A', 'sentence_B', 'label']
else:
    # Load DBRD dataset
    train_dataset = load_dataset("dbrd", split="train")
    test_dataset = load_dataset("dbrd", split="test")
    train_valid_split = train_dataset.train_test_split(test_size=0.1)
    train_dataset = train_valid_split['train']
    validation_dataset = train_valid_split['test']
    columns_to_keep = ['text', 'label']

# Get the column names
column_names = train_dataset.column_names
columns_to_remove = [col for col in column_names if col not in columns_to_keep]

# Remove unnecessary columns
train_dataset = train_dataset.remove_columns(columns_to_remove)
test_dataset = test_dataset.remove_columns(columns_to_remove)
validation_dataset = validation_dataset.remove_columns(columns_to_remove)

# Tokenize the datasets and keep the 'labels' field
print("Tokenizing datasets...")
if task == "NLI":
    train_dataset = train_dataset.map(lambda example: {**tokenizer(example['sentence_A'], example['sentence_B']), "labels": example["label"]}, batched=True)
    test_dataset = test_dataset.map(lambda example: {**tokenizer(example['sentence_A'], example['sentence_B']), "labels": example["label"]}, batched=True)
    validation_dataset = validation_dataset.map(lambda example: {**tokenizer(example['sentence_A'], example['sentence_B']), "labels": example["label"]}, batched=True)
else:
    max_length = 512
    train_dataset = train_dataset.map(lambda example: tokenizer(example['text'], truncation=True, padding='max_length', max_length=max_length), batched=True)
    test_dataset = test_dataset.map(lambda example: tokenizer(example['text'], truncation=True, padding='max_length', max_length=max_length), batched=True)
    validation_dataset = validation_dataset.map(lambda example: tokenizer(example['text'], truncation=True, padding='max_length', max_length=max_length), batched=True)

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)

    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='weighted')
    acc = accuracy_score(labels, predictions)

    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

# Custom callback to save the best model based on F1 score and evaluate the best model
class SaveBestModelCallback(TrainerCallback):
    def __init__(self, test_dataset):
        self.best_f1 = 0.0
        self.test_dataset = test_dataset
        self.testing = False

    def on_evaluate(self, args, state, control, **kwargs):
        current_f1 = None
        try:
            metrics = kwargs['metrics']
            current_f1 = metrics.get('eval_f1')
        except Exception as e:
            print("No metrics found.")
        if current_f1 is not None and current_f1 > self.best_f1 and not self.testing:
            self.best_f1 = current_f1
            print(f"New best F1 score: {current_f1}. Saving model...")
            model.save_adapter("DutchAdapter\\NLU prompt 4 babylm\\FineTune NLI\\best_model", adapter, with_head=True)
            # Perform evaluation with test set when we have a new best model
            print("Evaluating model on the test set...")
            self.testing = True
            eval_result = trainer.evaluate(self.test_dataset)
            self.testing = False
            print(f"scores on test set for best model: {eval_result}")

# Define the adapter arguments
adapterArgs = adapters.training.AdapterArguments(
  train_adapter=True,
)

# Setup the adapter training (this will add the adapter to the model)
print("Setting up adapter training...")
adapters.training.setup_adapter_training(
  model=model,
  adapter_args=adapterArgs,
  adapter_name=adapter,
)

# Define the training arguments for finetuning. Default training arguments from the adapter library are used
trainingArgs = TrainingArguments(
  learning_rate=1e-4,
  num_train_epochs=20,
  output_dir="DutchAdapter\\NLU prompt 4 babylm\\FineTune NLI",
  evaluation_strategy="epoch",
  save_strategy="no"  # Disable automatic saving, we will handle it in the callback
)

trainer = adapters.AdapterTrainer(
        model=model,
        args=trainingArgs,
        train_dataset=train_dataset,
        eval_dataset=validation_dataset,
        tokenizer=tokenizer,
        compute_metrics=compute_metrics,
        callbacks=[SaveBestModelCallback(test_dataset)]
    )

print("Training the model...")
trainer.train()

# Save the final model
print("Saving the final model...")
model.save_adapter("DutchAdapter\\NLU prompt 4 babylm\\FineTune NLI\\final_model", adapter, with_head=True)

# Evaluate the final model
print("Evaluating the model...")
trainer.evaluate(test_dataset)


Loading model and tokenizer...




Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']

You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading adapter...

Adding new classification head...

Loading datasets...

Tokenizing datasets...

Setting up adapter training...

Training the model...



dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)



Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.7387,0.65263,0.765657,0.768536,0.778379,0.765657
2,0.5288,0.586183,0.777778,0.764318,0.796738,0.777778
3,0.4719,0.565858,0.79798,0.792773,0.801667,0.79798
4,0.428,0.605459,0.793939,0.795181,0.799421,0.793939
5,0.4174,0.712645,0.787879,0.781033,0.792488,0.787879
6,0.3742,0.720113,0.781818,0.778311,0.78252,0.781818
7,0.3433,0.662482,0.808081,0.806699,0.807054,0.808081
8,0.3256,0.777551,0.80202,0.799951,0.800911,0.80202
9,0.2881,0.966474,0.787879,0.785589,0.786512,0.787879
10,0.2601,0.884112,0.783838,0.783084,0.782935,0.783838


New best F1 score: 0.768536008159265. Saving model...

Evaluating model on the test set...

scores on test set for best model: {'eval_loss': 0.6267037391662598, 'eval_accuracy': 0.763962494904199, 'eval_f1': 0.7666767249798707, 'eval_precision': 0.7749034155172728, 'eval_recall': 0.763962494904199}

New best F1 score: 0.7927726790814368. Saving model...

Evaluating model on the test set...

scores on test set for best model: {'eval_loss': 0.5124924778938293, 'eval_accuracy': 0.8030982470444353, 'eval_f1': 0.7997645135800788, 'eval_precision': 0.8037457999686906, 'eval_recall': 0.8030982470444353}

New best F1 score: 0.795181159995559. Saving model...

Evaluating model on the test set...

scores on test set for best model: {'eval_loss': 0.5370152592658997, 'eval_accuracy': 0.8116591928251121, 'eval_f1': 0.8134323047458849, 'eval_precision': 0.8187264170998542, 'eval_recall': 0.8116591928251121}

New best F1 score: 0.80669857392439. Saving model...

Evaluating model on the test set...

s

New best F1 score: 0.8200014108949054. Saving model...

Evaluating model on the test set...

scores on test set for best model: {'eval_loss': 1.0692464113235474, 'eval_accuracy': 0.8196086424785977, 'eval_f1': 0.8200014108949054, 'eval_precision': 0.8206066253355287, 'eval_recall': 0.8196086424785977, 'eval_runtime': 19.2327, 'eval_samples_per_second': 255.087, 'eval_steps_per_second': 31.925, 'epoch': 20.0}


{'eval_loss': 1.0692464113235474,
 'eval_accuracy': 0.8196086424785977,
 'eval_f1': 0.8200014108949054,
 'eval_precision': 0.8206066253355287,
 'eval_recall': 0.8196086424785977,
 'eval_runtime': 18.1661,
 'eval_samples_per_second': 270.063,
 'eval_steps_per_second': 33.799,
 'epoch': 20.0}

# Save the best model as zip, so we can download it from the notebook

In [None]:
!zip -r bestModel.zip "/content/DutchAdapter\NLU prompt 4 babylm\FineTune NLI\best_model"

updating: content/DutchAdapter\NLU prompt 4 babylm\FineTune NLI\best_model/ (stored 0%)

updating: content/DutchAdapter\NLU prompt 4 babylm\FineTune NLI\best_model/pytorch_adapter.bin (deflated 7%)

updating: content/DutchAdapter\NLU prompt 4 babylm\FineTune NLI\best_model/adapter_config.json (deflated 60%)
