[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1eopmApOqgU8AFJ0--5QmMnBqf-u29QGA?usp=sharing)

In [None]:
!pip install -U adapter-transformers
!pip install -U datasets
!pip install sentencepiece

# model and tokenizer

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
model_name = "xlm-roberta-base"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoModelForSequenceClassification.from_pretrained(model_name,num_labels=3).to(device)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# data processing

In [None]:
from datasets import load_dataset
dataset_valid = load_dataset('glue', 'mnli',split="validation_mismatched")
dataset_train = load_dataset('glue', 'mnli',split="train")

In [None]:
def tokenize_function(examples):
  encoded = tokenizer(examples["premise"],examples["hypothesis"],padding="max_length",max_length=128, truncation=True)
  return {"input_ids": encoded["input_ids"],
      "attention_mask": encoded["attention_mask"],
      "labels":examples["label"]
  }

tokenized_valid = dataset_valid.map(
    tokenize_function, 
    batched=True, 
    batch_size=128,
    remove_columns=dataset_valid.column_names,
)

tokenized_train = dataset_train.map(
    tokenize_function, 
    batched=True, 
    batch_size=128,
    remove_columns=dataset_train.column_names,
)

# set adapters/prefix

## adapters

In [None]:
from transformers import AdapterConfig
from transformers.adapters.composition import Stack
lang_adapter_config = AdapterConfig.load("pfeiffer", reduction_factor=2)
model.load_adapter("en/wiki@ukp", config=lang_adapter_config)

adap_name = "mnli"
model.add_adapter(adap_name)
model.train_adapter(adap_name)

model.active_adapters = Stack("en",adap_name)

In [None]:
print("With adapter parameters to train:")
print(sum(p.numel() for p in model.parameters() if p.requires_grad))

With adapter parameters to train:
1487427


## prefix

In [None]:
from transformers.adapters import PrefixTuningConfig

config = PrefixTuningConfig(flat=False, prefix_length=20)
model.add_adapter("prefix_tuning", config=config)

model.train_adapter("prefix_tuning")
model.active_adapters = "prefix_tuning"

In [None]:
print("With prefix parameters to train:")
print(sum(p.numel() for p in model.parameters() if p.requires_grad))

# training

In [None]:
from transformers import TrainingArguments,Trainer,AdapterTrainer
from transformers import DataCollatorWithPadding

data_collator = DataCollatorWithPadding(tokenizer=tokenizer, padding=True)

batch_size = 16

training_args = TrainingArguments(
  output_dir = "./training_nli",
  log_level = "error",
  num_train_epochs = 1,
  learning_rate = 3e-5,
  lr_scheduler_type = "linear",
  warmup_ratio = 0.06,
  per_device_train_batch_size = batch_size,
  per_device_eval_batch_size = batch_size,
  adam_beta1 = 0.9,
  adam_beta2 = 0.999,
  adam_epsilon = 1e-8,
  evaluation_strategy = "steps",
  eval_steps = 5000, 
  save_steps = 8000, 
  logging_steps = 1000,
  save_total_limit=1,
)


trainer = AdapterTrainer( #using ``Trainer`` for full fine-tuning
  model = model,
  args = training_args,
  data_collator = data_collator,
  train_dataset = tokenized_train,
  eval_dataset = tokenized_valid,
  tokenizer = tokenizer,
)

In [None]:
trainer.train()

In [None]:
# save lightweight tuned model
save_path = "./NLI_adapter" #or "./NLI_prefix"
os.makedirs(save_path, exist_ok=True)
trainer.save_model(save_path) 

In [None]:
# save full fine-tuned model
save_path = "./NLI_finetune"
os.makedirs(save_path, exist_ok=True)
trainer.model.save_pretrained(save_path)

# evaluation

## load trained model

In [None]:
# full fine-tuned model
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import datasets

model_name = "xlm-roberta-base"
model_path = "./NLI_finetune"
tokenizer = AutoTokenizer.from_pretrained(model_name)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoModelForSequenceClassification.from_pretrained(model_path).to(device)

In [None]:
# adapter
from transformers import AutoTokenizer, AutoModelForSequenceClassification,AdapterConfig
import torch
from datasets import load_dataset
import datasets
from transformers.adapters.composition import Stack

model_name = "xlm-roberta-base"
model_path = "./NLI_adapter"
tokenizer = AutoTokenizer.from_pretrained(model_path)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoModelForSequenceClassification.from_pretrained(model_name,num_labels=3)
model.load_adapter(f"{model_path}/mnli")

lang_adapter_config = AdapterConfig.load("pfeiffer", reduction_factor=2)
model.load_adapter("en/wiki@ukp", config=lang_adapter_config)
model.active_adapters = Stack("en","mnli")
model.to(device)

In [None]:
# prefix
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import datasets

model_name = "xlm-roberta-base"
model_path = "./NLI_prefix"
tokenizer = AutoTokenizer.from_pretrained(model_path)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoModelForSequenceClassification.from_pretrained(model_name,num_labels=3).to(device)

model.load_adapter(f"{model_path}/prefix_tuning")
model.active_adapters = "prefix_tuning"
model.to(device)

## output results

In [None]:
import datasets
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, f1_score, matthews_corrcoef
data_type = "cross_test" #or "mix_test"/"in_test"
language_list = ["en","ar","de","th","tr","zh"]

for language in language_list:
  if data_type == "mix_test":
    test_path = f"./NLI_test_data/{data_type}/{language}"
    test_dataset = datasets.load_from_disk(test_path)
    premises = test_dataset['premise']
    hypotheses = test_dataset['hypothesis']
    labels = test_dataset['label']
  else:
    test_dataset = pd.read_csv(f"./NLI_test_data/{data_type}/{language}.csv")
    premises = test_dataset['sentence1']
    hypotheses = test_dataset['sentence2']
    labels_text = test_dataset["gold_label"]
    label_map = {"contradiction": 2, "neutral": 1, "entailment": 0}
    labels = [label_map[label] for label in labels_text]

  encoded_dict = tokenizer.batch_encode_plus(
      list(zip(premises, hypotheses)),
      add_special_tokens=True,
      padding=True,
      truncation=True,
      return_attention_mask=True,
      return_tensors='pt'
  )

  input_ids = encoded_dict['input_ids']
  attention_masks = encoded_dict['attention_mask']

  from torch.utils.data import TensorDataset, DataLoader
  dataset = TensorDataset(input_ids, attention_masks, torch.tensor(labels))
  dataloader = DataLoader(dataset, batch_size=8)
  
  model.eval()

  predictions = []
  true_labels = []

  with torch.no_grad():
    for batch in dataloader:
      batch = tuple(t.to(device) for t in batch)
      input_ids, attention_masks, labels = batch

      outputs = model(input_ids, attention_mask=attention_masks)
      _, pred = torch.max(outputs[0], dim=1)

      predictions.extend(pred.cpu().numpy())
      true_labels.extend(labels.cpu().numpy())
  accuracy = round(accuracy_score(true_labels, predictions)*100,2)
  f1 = round(f1_score(true_labels, predictions, average="weighted")*100,2)
  mcc = round(matthews_corrcoef(true_labels, predictions)*100, 2)

  tuning_method = "finetune" #or "adapter"/"prefix"

  with open("./results_NLI.txt", "a") as f:
    f.write(f"{tuning_method}\t{language}\t{f1}\t{accuracy}\t{mcc}\t{data_type}\n")
  print(f"F1: {f1}\nAcc: {accuracy}\nMcc: {mcc}")