[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1zzmQJCQOKljRdTMu7vyXZ7q5Iomt5uN9?usp=sharing)

In [None]:
!pip install -U adapter-transformers
!pip install -U datasets

# model and tokenizer

In [None]:
from transformers import AutoConfig, AutoModelForQuestionAnswering, AutoTokenizer
import torch
model_name = "xlm-roberta-base"
config = AutoConfig.from_pretrained(model_name)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoModelForQuestionAnswering.from_pretrained(model_name, config=config).to(device)
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [None]:
print("Without adapter/prefix parameters to train:")
print(sum(p.numel() for p in model.parameters() if p.requires_grad))

Without adapter parameters to train:
277454594


# data processing

In [None]:
from datasets import load_dataset
import datasets

dataset = load_dataset("squad")
dataset["train"] = dataset["train"].shuffle(seed=42).select(range(50000))
dataset["validation"] = dataset["validation"].shuffle(seed=42).select(range(6250))

In [None]:
max_length = 384
doc_stride = 128

def tokenize_sample_data(data):
  tokenized_feature = tokenizer(
    data["question"],
    data["context"],
    max_length = max_length,
    return_overflowing_tokens=True,
    stride=doc_stride,
    truncation="only_second",
    padding = "max_length",
    return_offsets_mapping=True, 
  )


  sample_mapping = tokenized_feature.pop("overflow_to_sample_mapping")
  offset_mapping = tokenized_feature.pop("offset_mapping")

  tokenized_feature["start_positions"] = []
  tokenized_feature["end_positions"] = []
  for i, offsets in enumerate(offset_mapping):


    sample_index = sample_mapping[i]
    answers = data["answers"][sample_index]

    input_ids = tokenized_feature["input_ids"][i]
    cls_index = input_ids.index(tokenizer.cls_token_id)

    sequence_ids = tokenized_feature.sequence_ids(i)

    if len(answers["answer_start"]) == 0:
      tokenized_feature["start_positions"].append(cls_index)
      tokenized_feature["end_positions"].append(cls_index)
    else:
      start_char = answers["answer_start"][0]
      end_char = start_char + len(answers["text"][0])


      idx = 0
      while sequence_ids[idx] != 1:
        idx += 1
      context_start = idx
      context_end = len(input_ids) - 1
      while sequence_ids[context_end] != 1:
        context_end -= 1

      if not (
          offsets[context_start][0] <= start_char
          and offsets[context_end][1] >= end_char
      ):
        tokenized_feature["start_positions"].append(cls_index)
        tokenized_feature["end_positions"].append(cls_index)
      else:
        while (
            context_start < len(offsets)
            and offsets[context_start][0] <= start_char
        ):
            context_start += 1
        tokenized_feature["start_positions"].append(context_start - 1)
        while offsets[context_end][1] >= end_char:
            context_end -= 1
        tokenized_feature["end_positions"].append(context_end + 1)
  return tokenized_feature

tokenized_ds = dataset.map(
  tokenize_sample_data,
  remove_columns=dataset["train"].column_names,
  batched=True,
  batch_size=128)

# set adapters/prefix

## adapters

In [None]:
# language adapter
from transformers import AdapterConfig
from transformers.adapters.composition import Stack
lang_adapter_config = AdapterConfig.load("pfeiffer", reduction_factor=2)
model.load_adapter("en/wiki@ukp", config=lang_adapter_config)

adap_name = "qa_squad"
model.add_adapter(adap_name)
model.train_adapter(adap_name)

# model.set_active_adapters(adap_name)
# model.active_adapters = adap_name
model.active_adapters = Stack("en",adap_name)

In [None]:
print("With adapter parameters to train:")
print(sum(p.numel() for p in model.parameters() if p.requires_grad))

With adapter parameters to train:
896066


## prefix

In [None]:
from transformers.adapters import PrefixTuningConfig

config = PrefixTuningConfig(flat=False, prefix_length=30)
model.add_adapter("prefix_tuning", config=config)

model.train_adapter("prefix_tuning")
model.active_adapters = "prefix_tuning"

In [None]:
print("With prefix parameters to train:")
print(sum(p.numel() for p in model.parameters() if p.requires_grad))

# training

In [None]:
from transformers import TrainingArguments,AdapterTrainer,Trainer
from transformers import DefaultDataCollator

data_collator = DefaultDataCollator()

batch_size = 16

training_args = TrainingArguments(
  output_dir = "./training_qa",
  log_level = "error",
  num_train_epochs = 3,
  learning_rate = 7e-5,
  lr_scheduler_type = "linear",
  warmup_steps = 100,
  per_device_train_batch_size = batch_size,
  per_device_eval_batch_size = batch_size,
  evaluation_strategy = "steps",
  eval_steps = 1000, 
  save_steps = 5000, 
  logging_steps = 100,
  push_to_hub = False
)


trainer = AdapterTrainer( #using ``Trainer`` for full fine-tuning
  model = model,
  args = training_args,
  data_collator = data_collator,
  train_dataset = tokenized_ds["train"],
  eval_dataset = tokenized_ds["validation"],
  tokenizer = tokenizer,
)



In [None]:
trainer.train()

In [None]:
# save lightweight tuned model
save_path = "./QA_adapter" #or "./QA_prefix"
os.makedirs(save_path, exist_ok=True)
trainer.save_model(save_path) 

In [None]:
# save full fine-tuned model
save_path = "./QA_finetune"
os.makedirs(save_path, exist_ok=True)
trainer.model.save_pretrained(save_path)

# evaluation

## load trained model

In [None]:
# full fine-tune
from transformers import AutoTokenizer, AutoModelForQuestionAnswering
from transformers import pipeline
import torch
from datasets import load_dataset

model_name = "xlm-roberta-base"
model_path = "./QA_finetune"
tokenizer = AutoTokenizer.from_pretrained(model_name)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoModelForQuestionAnswering.from_pretrained(model_path).to(device)
qa_ppl = pipeline("question-answering", model=model, tokenizer=tokenizer, device=0)

In [None]:
# adapters
from transformers import AutoTokenizer, AutoModelForQuestionAnswering,AdapterConfig
from transformers import pipeline
import torch
from datasets import load_dataset
from transformers.adapters.composition import Stack

model_name = "xlm-roberta-base"
model_path = "./QA_adapter"
tokenizer = AutoTokenizer.from_pretrained(model_path)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoModelForQuestionAnswering.from_pretrained(model_name).to(device)

model.load_adapter(f"{model_path}/qa_squad")

lang_adapter_config = AdapterConfig.load("pfeiffer", reduction_factor=2)
model.load_adapter("en/wiki@ukp", config=lang_adapter_config)
model.active_adapters = Stack("en","qa_squad")

In [None]:
# prefix
from transformers import AutoConfig, AutoModelForQuestionAnswering, AutoTokenizer
import torch
model_name = "xlm-roberta-base"
config = AutoConfig.from_pretrained(model_name)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoModelForQuestionAnswering.from_pretrained(model_name, config=config).to(device)

model_path = "./QA_prefix"
tokenizer = AutoTokenizer.from_pretrained(model_path)

model.load_adapter(f"{model_path}/prefix_tuning")
model.active_adapters = "prefix_tuning"

## output result

In [None]:
import string, re, os
import pandas as pd


def normaliza_text(text):
  """
  Removing articles and punctuations, and standardizing whitespace.
  """
  text = text.lower()
  #remove puncts
  exclude = set(string.punctuation) #set of all puncts
  text = "".join(character for character in text if character not in exclude) #iterate every character
  #remove articles
  regex = re.compile(r"\b(a|an|the)\b",re.UNICODE) #build the pattern object， regular experession
  text = re.sub(regex," ", text)
  #fix the whitespece
  text = " ".join(text.split()) #split default by blank
  return text

def metric_exact_match(prediction,label):
  if len(prediction) == 0 or len(label) == 0:
    return int(prediction == label)
  return int(normaliza_text(prediction) == normaliza_text(label))

def metric_f1(prediction,label):
  pred_tokens = normaliza_text(prediction).split()
  label_tokens = normaliza_text(label).split()

  #if either prediction or label is no-answer, then f1=1 if they match, 0 otherwise
  if len(pred_tokens) == 0 or len(label_tokens) == 0:
    return int(pred_tokens==label_tokens)

  commen_tokens = set(pred_tokens) & set(label_tokens) # & means overlap
  if len(commen_tokens) == 0:
    return 0

  precision = len(commen_tokens) / len(pred_tokens)
  recall = len(commen_tokens) / len(label_tokens)
  f1 = (2*precision*recall) / (precision+recall)
  return f1

def metric_max_over_label(metric_type, prediction, labels):
  scores = []
  for label in labels:
    score = metric_type(prediction, label)
    scores.append(score)
  return max(scores)

def evaluate(gold_labels, predictions):
  f1 = e_m = total = 0
  for prediction,labels in zip(predictions,gold_labels):
    total += 1
    e_m += metric_max_over_label(metric_exact_match,prediction,labels)
    f1 += metric_max_over_label(metric_f1,prediction,labels)
  
  e_m = 100.0 * round(e_m/total,4)
  f1 = 100.0 * round(f1/total,4)

  return e_m, f1

def output_predictions_results(pipeline, language, path_predictions): 
    """
    extract gold labels from the test set, and get predictions using pipeline;
    evaluate and output the predictions;
    output the evaluation results to make qualitative analysis
    """
    dataset_test = load_dataset("xquad",f"xquad.{language}") #e.g. "xquad.en"
    cqa = [[],[],[]]
    for piece in dataset_test["validation"]:
        cqa[0].append(piece["context"])
        cqa[1].append(piece["question"])
        cqa[2].append(piece["answers"]["text"]) #xquad has only one answer
        assert len(cqa[0])==len(cqa[1])==len(cqa[2])

    predictions = []
    for (c,q,a) in zip(cqa[0],cqa[1],cqa[2]):
        input = {"context":c, "question": q}
        prediction = pipeline(input,align_to_words=True)
        score = prediction["score"]
        answer = prediction["answer"]
        predictions.append(answer)

        if path_predictions:    # qualitative analysis
            with open(path_predictions,"a") as f:
                f.write(f"{c}\t{q}\t{a}\t{answer}\t{score}\n")
    
    return evaluate(cqa[2],predictions)

In [None]:
from transformers import pipeline
qa_ppl = pipeline("question-answering", model=model, tokenizer=tokenizer, device=0)
l_list = ["en","ar","de","th","tr","zh"]
for l in l_list:
  language = l
  em, f1 = output_predictions_results(qa_ppl,language,path_predictions=None)
  tuning_method = "finetune" #or "adapter"/"prefix"

  with open("./results_QA.txt","a") as f:
    # f.write(f"model\tlanguage\tEM\tF1\n")
    f.write(f"{tuning_method}\t{language}\t{em}\t{f1}\n")