<a href="https://colab.research.google.com/github/HimashiRathnayake/CMCS-Text-Classification/blob/main/XLM-R/Train_Adapter_Fusion.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Combining Pre-Trained Adapters using AdapterFusion for Code-Mixed and Code-Switched Data

## Parameters

In [1]:
target_task = "humor" #@param ["sentiment", "humor", "hate speech", "languageID"]

## Installation

In [2]:
# !pip install -U adapter-transformers
# !pip install datasets
# !pip install sentencepiece

Collecting adapter-transformers
  Downloading adapter_transformers-2.2.0-py3-none-any.whl (2.9 MB)
[K     |████████████████████████████████| 2.9 MB 5.4 MB/s 
[?25hCollecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 39.2 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.46-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 38.2 MB/s 
Collecting huggingface-hub>=0.0.17
  Downloading huggingface_hub-0.1.2-py3-none-any.whl (59 kB)
[K     |████████████████████████████████| 59 kB 6.9 MB/s 
Collecting tokenizers<0.11,>=0.10.1
  Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)
[K     |████████████████████████████████| 3.3 MB 10.9 MB/s 
Installing collected packages: pyyaml, tokenizers, sacremoses, huggingface-hub, adapter-t

## Dependencies

In [28]:
# from datasets import load_dataset
from google.colab import drive
from sklearn.model_selection import train_test_split
from transformers import XLMRobertaTokenizer, TrainingArguments, AdapterTrainer, EvalPrediction, XLMRobertaConfig, XLMRobertaModelWithHeads, TextClassificationPipeline
from transformers.adapters.composition import Fuse
from datasets import load_metric
import numpy as np
import pandas as pd
import torch

In [4]:
drive.mount('/content/drive')

Mounted at /content/drive


## Dataset Preprocessing

In [5]:
dataset_path = "/content/drive/Shareddrives/FYP/corpus/çompleted_draft.csv"

all_data = pd.read_csv(dataset_path)

if (target_task == "humor"):
  all_data = all_data[['Sentence', 'Humor']]
elif (target_task == "hate speech"):
  all_data = all_data[['Sentence', 'Hate_speech']]
elif (target_task == "sentiment"):
  all_data = all_data[['Sentence', 'Sentiment']]
else:
  all_data = all_data[['Sentence', 'LangiageID']]

all_data.columns = ['Sentence', 'Label']
id2label = {id_: label for id_, label in enumerate(pd.unique(all_data['Label']))}
all_data['Label'], uniq = pd.factorize(all_data['Label'])

X = all_data['Sentence'].values.tolist()
y = all_data['Label'].values.tolist()

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state = 42)

In [7]:
tokenizer = XLMRobertaTokenizer.from_pretrained("xlm-roberta-base", do_lower_case=True)

Downloading:   0%|          | 0.00/4.83M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/8.68M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/512 [00:00<?, ?B/s]

In [8]:
MAX_LEN = 128

def encode_batch(batch):
  """Encodes a batch of input data using the model tokenizer."""
  return tokenizer(batch, max_length=MAX_LEN, truncation=True, padding="max_length")

In [9]:
# Encode the input data
encoded_X_train = encode_batch(X_train)
encoded_X_test = encode_batch(X_test)

In [10]:
class DatasetObject(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

train_dataset = DatasetObject(encoded_X_train, y_train)
test_dataset = DatasetObject(encoded_X_test, y_test)

## Fusion Training

In [11]:
config = XLMRobertaConfig.from_pretrained(
    "xlm-roberta-base",
    id2label=id2label,
)

model = XLMRobertaModelWithHeads.from_pretrained(
    "xlm-roberta-base",
    config=config,
)

Downloading:   0%|          | 0.00/1.04G [00:00<?, ?B/s]

Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaModelWithHeads: ['lm_head.decoder.weight', 'lm_head.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing XLMRobertaModelWithHeads from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaModelWithHeads from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaModelWithHeads were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['roberta.embeddings.position_ids']
You should probably TRAIN this model on a down-stream task to be able to use it for pr

In [14]:
# Load the pre-trained adapters we want to fuse
model.load_adapter("/content/drive/Shareddrives/FYP-CodeStars/Implementation/TrainedAdapters/task_adapter_hate speech", load_as="hate", with_head=False)
model.load_adapter("/content/drive/Shareddrives/FYP-CodeStars/Implementation/TrainedAdapters/task_adapter_humor", load_as="humor", with_head=False)
model.load_adapter("/content/drive/Shareddrives/FYP-CodeStars/Implementation/TrainedAdapters/task_adapter_sentiment", load_as="sentiment", with_head=False)

# Add a fusion layer for all loaded adapters
adapter_setup = Fuse("hate", "humor", "sentiment")
model.add_adapter_fusion(adapter_setup)
model.set_active_adapters(adapter_setup)

# Add a classification head for target task
model.add_classification_head("cb", num_labels=len(id2label))

Overwriting existing adapter 'hate'.
Overwriting existing adapter 'humor'.


In [15]:
# Unfreeze and activate fusion setup
model.train_adapter_fusion(adapter_setup)

In [16]:
training_args = TrainingArguments(
    learning_rate = 5e-5,
    num_train_epochs = 3,
    per_device_train_batch_size = 32,
    per_device_eval_batch_size = 32,
    logging_steps = 200,
    output_dir = "./training_output",
    overwrite_output_dir = True,
    # The next line is important to ensure the dataset labels are properly passed to the model
    remove_unused_columns = False,
)

In [17]:
def compute_metrics(eval_pred):
    metric1 = load_metric("precision")
    metric2 = load_metric("recall")
    metric3 = load_metric("f1")
    metric4 = load_metric("accuracy")
    
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    precision = metric1.compute(predictions=predictions, references=labels, average="weighted")["precision"]
    recall = metric2.compute(predictions=predictions, references=labels, average="weighted")["recall"]
    f1 = metric3.compute(predictions=predictions, references=labels, average="weighted")["f1"]
    accuracy = metric4.compute(predictions=predictions, references=labels)["accuracy"]
    macro_precision = metric1.compute(predictions=predictions, references=labels, average="macro")["precision"]
    macro_recall = metric2.compute(predictions=predictions, references=labels, average="macro")["recall"]
    macro_f1 = metric3.compute(predictions=predictions, references=labels, average="macro")["f1"]
    return {"accuracy":accuracy, "precision": precision, "recall": recall, "f1": f1, "macro_precision": macro_precision, "macro_recall": macro_recall, "macro_f1": macro_f1}

In [19]:
trainer = AdapterTrainer(
    model = model,
    args = training_args,
    train_dataset = train_dataset,
    eval_dataset = test_dataset,
    compute_metrics = compute_metrics,
)

In [20]:
trainer.train()

***** Running training *****
  Num examples = 12166
  Num Epochs = 3
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 1143


Step,Training Loss
200,0.2699
400,0.2171
600,0.2145
800,0.2168
1000,0.214


Saving model checkpoint to ./training_output/checkpoint-500
Configuration saved in ./training_output/checkpoint-500/hate/adapter_config.json
Module weights saved in ./training_output/checkpoint-500/hate/pytorch_adapter.bin
Configuration saved in ./training_output/checkpoint-500/humor/adapter_config.json
Module weights saved in ./training_output/checkpoint-500/humor/pytorch_adapter.bin
Configuration saved in ./training_output/checkpoint-500/sentiment/adapter_config.json
Module weights saved in ./training_output/checkpoint-500/sentiment/pytorch_adapter.bin
Configuration saved in ./training_output/checkpoint-500/hate,humor/adapter_fusion_config.json
Module weights saved in ./training_output/checkpoint-500/hate,humor/pytorch_model_adapter_fusion.bin
Configuration saved in ./training_output/checkpoint-500/hate,humor,sentiment/adapter_fusion_config.json
Module weights saved in ./training_output/checkpoint-500/hate,humor,sentiment/pytorch_model_adapter_fusion.bin
Configuration saved in ./trai

TrainOutput(global_step=1143, training_loss=0.22357689364375719, metrics={'train_runtime': 2240.6172, 'train_samples_per_second': 16.289, 'train_steps_per_second': 0.51, 'total_flos': 3088239608982528.0, 'train_loss': 0.22357689364375719, 'epoch': 3.0})

Validation

In [21]:
trainer.evaluate()

***** Running Evaluation *****
  Num examples = 1352
  Batch size = 32


Downloading:   0%|          | 0.00/2.10k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/2.10k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/2.07k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.42k [00:00<?, ?B/s]

{'epoch': 3.0,
 'eval_accuracy': 0.9408284023668639,
 'eval_f1': 0.9311673091571737,
 'eval_loss': 0.19496497511863708,
 'eval_macro_f1': 0.743275703285957,
 'eval_macro_precision': 0.8715687952976088,
 'eval_macro_recall': 0.687528801843318,
 'eval_precision': 0.9343701110324317,
 'eval_recall': 0.9408284023668639,
 'eval_runtime': 37.1589,
 'eval_samples_per_second': 36.384,
 'eval_steps_per_second': 1.157}

In [36]:
# def predict(premise, hypothesis):
#   encoded = tokenizer(premise, hypothesis, return_tensors="pt")
#   if torch.cuda.is_available():
#     encoded.to("cuda")
#   logits = model(**encoded)[0]
#   pred_class = torch.argmax(logits).item()
#   return id2label[pred_class]

# predict("Lol 😅")

classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer, device=training_args.device.index)
classifier("Lol 😅")

[{'label': 'LABEL_1', 'score': 0.6943101286888123}]

In [None]:
model.save_adapter_fusion("./saved", "multinli, qqp, qnli")
model.save_all_adapters("./saved")

!ls -l saved

Configuration saved in ./saved/adapter_fusion_config.json
Module weights saved in ./saved/pytorch_model_adapter_fusion.bin
Configuration saved in ./saved/multinli/adapter_config.json
Module weights saved in ./saved/multinli/pytorch_adapter.bin
Configuration saved in ./saved/qqp/adapter_config.json
Module weights saved in ./saved/qqp/pytorch_adapter.bin
Configuration saved in ./saved/qnli/adapter_config.json
Module weights saved in ./saved/qnli/pytorch_adapter.bin


total 83056
-rw-r--r-- 1 root root      407 Nov 30 09:00 adapter_fusion_config.json
drwxr-xr-x 2 root root     4096 Nov 30 09:00 multinli
-rw-r--r-- 1 root root 85029617 Nov 30 09:00 pytorch_model_adapter_fusion.bin
drwxr-xr-x 2 root root     4096 Nov 30 09:00 qnli
drwxr-xr-x 2 root root     4096 Nov 30 09:00 qqp
