<a href="https://colab.research.google.com/github/HimashiRathnayake/CMCS-MTL/blob/main/Train_Adapter_Fusion_Multi_task_learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Combining Pre-Trained Adapters using AdapterFusion for Code-Mixed and Code-Switched Data

## Parameters

In [None]:
target_task = "hate speech" #@param ["sentiment", "humor", "hate speech", "languageID"]
unfreeze_model = False #@param {type:"boolean"}
oversample_dataset = True #@param {type:"boolean"}
over_sampling_technique = "ROS" #@param ["", "ROS","ADASYN", "SMOTE", "BorderlineSMOTE"]
sampling_strategy = "1:0.25:0.25" #@param [] {allow-input: true}
lang_adapter_setting = "fusion" #@param ["none", "stack", "parallel", "fusion"]
random_state = 42 #@param
learning_rate =  5e-5  #1e-4

## Installation

In [None]:
!pip install -U adapter-transformers
!pip install datasets
!pip install sentencepiece

Collecting adapter-transformers
  Downloading adapter_transformers-3.0.1-py3-none-any.whl (3.9 MB)
[K     |████████████████████████████████| 3.9 MB 4.9 MB/s 
[?25hCollecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 58.9 MB/s 
[?25hCollecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.6.0-py3-none-any.whl (84 kB)
[K     |████████████████████████████████| 84 kB 3.8 MB/s 
[?25hCollecting tokenizers!=0.11.3,>=0.11.1
  Downloading tokenizers-0.12.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.6 MB)
[K     |████████████████████████████████| 6.6 MB 43.7 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.53.tar.gz (880 kB)
[K     |████████████████████████████████| 880 kB 62.7 MB/s 
Building wheels for collected packages: sacremoses
  Building wheel for sacremoses (setup.py) ... [?25l[?25hdone
  Cr

## Dependencies

In [None]:
# from datasets import load_dataset
from google.colab import drive
from sklearn.model_selection import train_test_split
from transformers import XLMRobertaTokenizer, TrainingArguments, Trainer, EvalPrediction, XLMRobertaConfig, XLMRobertaModelWithHeads, TextClassificationPipeline, AdapterConfig, EarlyStoppingCallback
from transformers.adapters.composition import Fuse, Stack, Parallel
from datasets import load_metric
import numpy as np
import pandas as pd
import torch
from collections import Counter
from imblearn.over_sampling import RandomOverSampler, SMOTE, ADASYN, BorderlineSMOTE

In [None]:
drive.mount('/content/drive')

Mounted at /content/drive


## Dataset Preprocessing

In [None]:
def apply_oversampling(x, y):

  (unique, counts) = np.unique(y, axis=0, return_counts=True)
  print("Class Distribution Without Oversampling", counts)

  # define oversampling strategy
  if (over_sampling_technique == ""):
    return x, y
  elif (over_sampling_technique == "ROS"):
    if (target_task=="humor"):
      oversample = RandomOverSampler(sampling_strategy = float(sampling_strategy))
    else:
      sampling_ratio = sampling_strategy.split(":");
      oversample = RandomOverSampler(sampling_strategy = {
          0:int(counts[0]*float(sampling_ratio[0])), 
          1:int(counts[0]*float(sampling_ratio[1])), 
          2:int(counts[0]*float(sampling_ratio[2]))
          })
  elif (over_sampling_technique == "ADASYN"):
    oversample = ADASYN(sampling_strategy="minority")
  elif (over_sampling_technique == "SMOTE"):
    oversample = SMOTE()
  elif (over_sampling_technique == "BorderlineSMOTE"):
    oversample = BorderlineSMOTE()

  # fit and apply the transform
  X_over, y_over = oversample.fit_resample(x, y)

  (unique, counts) = np.unique(y_over, axis=0, return_counts=True)
  print("Class Distribution After Oversampling", counts)

  return X_over, y_over

In [None]:
dataset_path = "/content/drive/Shareddrives/FYP/corpus/çompleted_draft.csv"

all_data = pd.read_csv(dataset_path)

if (target_task == "humor"):
  all_data = all_data[['Sentence', 'Humor']]
elif (target_task == "hate speech"):
  all_data = all_data[['Sentence', 'Hate_speech']]
elif (target_task == "sentiment"):
  all_data = all_data[['Sentence', 'Sentiment']]
else:
  all_data = all_data[['Sentence', 'LangiageID']]

all_data.columns = ['Sentence', 'Label']
id2label = {id_: label for id_, label in enumerate(pd.unique(all_data['Label']))}
all_data['Label'], uniq = pd.factorize(all_data['Label'])

X = all_data['Sentence'].values.tolist()
y = all_data['Label'].values.tolist()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state = random_state)

In [None]:
if oversample_dataset:
  X_train = np.array(X_train).reshape(-1, 1)
  X_train, y_train = apply_oversampling(X_train, y_train)
  X_train = [x[0] for x in X_train.tolist()]

Class Distribution Without Oversampling [11036   314   816]
Class Distribution After Oversampling [11036  2759  2759]


In [None]:
tokenizer = XLMRobertaTokenizer.from_pretrained("xlm-roberta-base", do_lower_case=True)

Downloading:   0%|          | 0.00/4.83M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/615 [00:00<?, ?B/s]

In [None]:
MAX_LEN = 128

def encode_batch(batch):
  """Encodes a batch of input data using the model tokenizer."""
  return tokenizer(batch, max_length=MAX_LEN, truncation=True, padding="max_length")

In [None]:
# Encode the input data
encoded_X_train = encode_batch(X_train)
encoded_X_test = encode_batch(X_test)

In [None]:
class DatasetObject(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

train_dataset = DatasetObject(encoded_X_train, y_train)
test_dataset = DatasetObject(encoded_X_test, y_test)

## Fusion Training

In [None]:
config = XLMRobertaConfig.from_pretrained(
    "xlm-roberta-base",
    id2label=id2label,
)

model = XLMRobertaModelWithHeads.from_pretrained(
    "xlm-roberta-base",
    config=config,
)



Downloading:   0%|          | 0.00/1.04G [00:00<?, ?B/s]

Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaModelWithHeads: ['lm_head.dense.bias', 'lm_head.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing XLMRobertaModelWithHeads from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaModelWithHeads from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaModelWithHeads were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['roberta.embeddings.position_ids']
You should probably TRAIN this model on a down-stream task to be able to use it for pr

In [None]:
# # # Load the language adapters
# # lang_adapter_config = AdapterConfig.load("pfeiffer", reduction_factor=2)
if lang_adapter_setting != "none":
  lang_adapter_config = AdapterConfig.load("pfeiffer+inv")
  model.load_adapter("/content/drive/Shareddrives/FYP/TrainedAdapters/mlm", config=lang_adapter_config, load_as="si-en", with_head=False)
  model.load_adapter("/content/drive/Shareddrives/FYP/TrainedAdapters/si_mlm", config=lang_adapter_config, load_as="si", with_head=False)
  config = AdapterConfig.load("pfeiffer", non_linearity="relu", reduction_factor=2)
  model.load_adapter("en/wiki@ukp", config=config)

Downloading:   0%|          | 0.00/538 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/28.2M [00:00<?, ?B/s]

In [None]:
# Load the pre-trained adapters we want to fuse
model.load_adapter("/content/drive/Shareddrives/FYP/Implementation/TrainedAdapters/Final/hate", load_as="hate", with_head=False)
model.load_adapter("/content/drive/Shareddrives/FYP/Implementation/TrainedAdapters/Final/humor", load_as="humor", with_head=False)
model.load_adapter("/content/drive/Shareddrives/FYP/Implementation/TrainedAdapters/Final/sentiment", load_as="sentiment", with_head=False)
model.load_adapter("/content/drive/Shareddrives/FYP/Implementation/TrainedAdapters/Final/language_id", load_as="languageID", with_head=False)

'languageID'

In [None]:
# Set up adapters
# lang_adapter_setup = Fuse("si-en", "si", "en")
adapter_setup = Fuse("hate", "humor", "sentiment", "languageID")
# model.add_adapter_fusion(lang_adapter_setup)
model.add_adapter_fusion(adapter_setup)

if lang_adapter_setting == "none":
  print("none")
  model.set_active_adapters(adapter_setup)
elif lang_adapter_setting == "stack":
  print("stack")
  model.set_active_adapters(Stack("en", "si", "si-en", adapter_setup))
elif lang_adapter_setting == "parallel":
  print("parallel")
  model.set_active_adapters(Stack(Parallel("en", "si", "si-en"), adapter_setup))
elif lang_adapter_setting == "fusion":
  print("fusion")
  lang_adapter_setup = Fuse("en", "si", "si-en")
  model.add_adapter_fusion(lang_adapter_setup)
  model.set_active_adapters(Stack(lang_adapter_setup, adapter_setup))

# Add a classification head for target task
model.add_classification_head('classifier', num_labels=len(id2label))

# # Unfreeze and activate fusion setup
if lang_adapter_setting == "fusion":
  model.train_adapter_fusion(lang_adapter_setup, adapter_setup)
else:
  model.train_adapter_fusion(adapter_setup)

fusion


In [None]:
if unfreeze_model:
  # Unfreeze the model to train both the model and adapter
  model.freeze_model(False)

In [None]:
training_args = TrainingArguments(
    learning_rate = learning_rate,
    num_train_epochs = 6,
    per_device_train_batch_size = 32,
    per_device_eval_batch_size = 32,
    # logging_steps = 200,
    output_dir = "./training_output",
    overwrite_output_dir = True,
    # The next line is important to ensure the dataset labels are properly passed to the model
    remove_unused_columns = False,
    metric_for_best_model="eval_macro_f1",
    load_best_model_at_end=True,
    save_strategy="epoch",
    evaluation_strategy="epoch"
)

In [None]:
def compute_metrics(eval_pred):
    metric1 = load_metric("precision")
    metric2 = load_metric("recall")
    metric3 = load_metric("f1")
    metric4 = load_metric("accuracy")
    
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    precision = metric1.compute(predictions=predictions, references=labels, average="weighted")["precision"]
    recall = metric2.compute(predictions=predictions, references=labels, average="weighted")["recall"]
    f1 = metric3.compute(predictions=predictions, references=labels, average="weighted")["f1"]
    accuracy = metric4.compute(predictions=predictions, references=labels)["accuracy"]
    macro_precision = metric1.compute(predictions=predictions, references=labels, average="macro")["precision"]
    macro_recall = metric2.compute(predictions=predictions, references=labels, average="macro")["recall"]
    macro_f1 = metric3.compute(predictions=predictions, references=labels, average="macro")["f1"]
    return {"accuracy":accuracy, "precision": precision, "recall": recall, "f1": f1, "macro_precision": macro_precision, "macro_recall": macro_recall, "macro_f1": macro_f1}

In [None]:
trainer = Trainer(
    model = model,
    args = training_args,
    train_dataset = train_dataset,
    eval_dataset = test_dataset,
    compute_metrics = compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]
)

In [None]:
trainer.train()

***** Running training *****
  Num examples = 16554
  Num Epochs = 6
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 3108


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Macro Precision,Macro Recall,Macro F1
1,0.7332,0.268954,0.903107,0.897744,0.903107,0.900229,0.578689,0.54459,0.559518
2,0.4652,0.311869,0.881657,0.924237,0.881657,0.897463,0.583406,0.773522,0.645413
3,0.3049,0.251056,0.91568,0.927539,0.91568,0.920624,0.654183,0.750208,0.694212
4,0.2062,0.27247,0.929734,0.930778,0.929734,0.929878,0.721189,0.695352,0.704398
5,0.164,0.278396,0.924556,0.932876,0.924556,0.928111,0.679105,0.753993,0.71206
6,0.13,0.283268,0.926775,0.934404,0.926775,0.930017,0.693827,0.767692,0.726636


***** Running Evaluation *****
  Num examples = 1352
  Batch size = 32


Downloading builder script:   0%|          | 0.00/2.58k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/2.52k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/1.65k [00:00<?, ?B/s]

Saving model checkpoint to ./training_output/checkpoint-518
Configuration saved in ./training_output/checkpoint-518/config.json
Model weights saved in ./training_output/checkpoint-518/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1352
  Batch size = 32
Saving model checkpoint to ./training_output/checkpoint-1036
Configuration saved in ./training_output/checkpoint-1036/config.json
Model weights saved in ./training_output/checkpoint-1036/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1352
  Batch size = 32
Saving model checkpoint to ./training_output/checkpoint-1554
Configuration saved in ./training_output/checkpoint-1554/config.json
Model weights saved in ./training_output/checkpoint-1554/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1352
  Batch size = 32
Saving model checkpoint to ./training_output/checkpoint-2072
Configuration saved in ./training_output/checkpoint-2072/config.json
Model weights saved in ./training_output/chec

TrainOutput(global_step=3108, training_loss=0.3265785468284381, metrics={'train_runtime': 2968.8042, 'train_samples_per_second': 33.456, 'train_steps_per_second': 1.047, 'total_flos': 1.0928844125494272e+16, 'train_loss': 0.3265785468284381, 'epoch': 6.0})

## Validation after stacking a language adapter

In [None]:
trainer.evaluate()

***** Running Evaluation *****
  Num examples = 1352
  Batch size = 32


{'epoch': 6.0,
 'eval_accuracy': 0.9267751479289941,
 'eval_f1': 0.930017013825134,
 'eval_loss': 0.2832675874233246,
 'eval_macro_f1': 0.7266359159313424,
 'eval_macro_precision': 0.6938268975298839,
 'eval_macro_recall': 0.7676917706581888,
 'eval_precision': 0.9344037971642575,
 'eval_recall': 0.9267751479289941,
 'eval_runtime': 17.3749,
 'eval_samples_per_second': 77.813,
 'eval_steps_per_second': 2.475}

## Save Trained Fusion

In [None]:
# model.save_adapter_fusion("/content/drive/Shareddrives/FYP/final_adapter/fusion", "hate,humor,sentiment,languageID")

## Get Predictions

In [None]:
# def predict(premise, hypothesis):
#   encoded = tokenizer(premise, hypothesis, return_tensors="pt")
#   if torch.cuda.is_available():
#     encoded.to("cuda")
#   logits = model(**encoded)[0]
#   pred_class = torch.argmax(logits).item()
#   return id2label[pred_class]

# predict("Lol 😅")

# classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer, device=training_args.device.index)
# classifier("😅")

In [None]:
# model.save_adapter_fusion("./saved", "multinli, qqp, qnli")
# model.save_all_adapters("./saved")

# !ls -l saved