<a href="https://colab.research.google.com/github/HimashiRathnayake/CMCS-Text-Classification/blob/main/Adapter_Based_Fine_Tuning/Sentence_Level_Tasks_Adapter_Based_Fine_Tuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Training Single Task Adapters 
For Bert SinBert and XLM-R Models

In [None]:
import torch
print("GPU Device name")
torch.cuda.get_device_name(0)

GPU Device name


'Tesla K80'

### **Parameters**

In [None]:
model_type = "XLM-R" #@param ["SinBERT", "Bert", "XLM-R"]
technique = "humor" #@param ["humor", "hate speech", "sentiment"]
load_adapter = False #@param {type:"boolean"}
# train = True #@param {type:"boolean"}
unfreeze_model = False #@param {type:"boolean"}
save_adapter = False #@param {type:"boolean"}
oversample_dataset = False #@param {type:"boolean"}
lang_adapter_setting = "stack" #@param ["none", "stack", "parallel"]
random_state = 43 #@param
adapter_config = "pfeiffer" #@param ["houlsby", "pfeiffer"]
over_sampling_technique = "ROS" #@param ["", "ROS","ADASYN", "SMOTE", "BorderlineSMOTE"]
sampling_strategy = "1:0.25:0.25" #@param [] {allow-input: true} 
## eg: 1:0.25:0.25 for hate | 0.5 for humor | 1:1:1:1 or 0.5:1:0.5:0.5 or 0.25:1:0.25:0.25 for sentiment

In [None]:
pretrained_adapter_path = "/content/drive/Shareddrives/FYP/TrainedAdapters/opt_bert_sinBert_task_adapter_"+technique #+ "_" + str(random_state)

### Installation

In [None]:
# !pip install -U adapter-transformers
# !pip install datasets
# !pip install sentencepiece

### Dependencies

In [None]:
from google.colab import drive
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from transformers import AutoTokenizer, AutoConfig, AutoModelWithHeads, TrainingArguments, AdapterTrainer, EvalPrediction, TextClassificationPipeline, AdapterConfig, EarlyStoppingCallback, Trainer
from transformers.adapters.composition import Fuse, Stack, Parallel
from datasets import load_metric
from collections import Counter
from imblearn.over_sampling import RandomOverSampler, SMOTE, ADASYN, BorderlineSMOTE

In [None]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Dataset Preprocessing

In [None]:
def apply_oversampling(x, y):

  (unique, counts) = np.unique(y, axis=0, return_counts=True)
  print("Class Distribution Without Oversampling", counts)

  # define oversampling strategy
  if (over_sampling_technique == ""):
    return x, y
  elif (over_sampling_technique == "ROS"):
    if (technique=="humor"):
      oversample = RandomOverSampler(sampling_strategy = float(sampling_strategy))
    elif (technique=="hate speech"):
      sampling_ratio = sampling_strategy.split(":");
      oversample = RandomOverSampler(sampling_strategy = {
          0:int(counts[0]*float(sampling_ratio[0])), 
          1:int(counts[0]*float(sampling_ratio[1])), 
          2:int(counts[0]*float(sampling_ratio[2]))
          })
    elif (technique=="sentiment"):
      sampling_ratio = sampling_strategy.split(":");
      oversample = RandomOverSampler(sampling_strategy = {
          0:int(counts[1]*float(sampling_ratio[0])), 
          1:int(counts[1]*float(sampling_ratio[1])), 
          2:int(counts[1]*float(sampling_ratio[2])),
          3:int(counts[1]*float(sampling_ratio[3]))
          })
  elif (over_sampling_technique == "ADASYN"):
    oversample = ADASYN(sampling_strategy="minority")
  elif (over_sampling_technique == "SMOTE"):
    oversample = SMOTE()
  elif (over_sampling_technique == "BorderlineSMOTE"):
    oversample = BorderlineSMOTE()

  # fit and apply the transform
  X_over, y_over = oversample.fit_resample(x, y)

  (unique, counts) = np.unique(y_over, axis=0, return_counts=True)
  print("Class Distribution After Oversampling", counts)

  return X_over, y_over

In [None]:
dataset_path = "/content/drive/Shareddrives/FYP/corpus/çompleted_draft.csv"

In [None]:
all_data = pd.read_csv(dataset_path)

if (technique == "humor"):
  all_data = all_data[['Sentence', 'Humor']]
elif (technique == "hate speech"):
  all_data = all_data[['Sentence', 'Hate_speech']]
else:
  all_data = all_data[['Sentence', 'Sentiment']]

all_data.columns = ['Sentence', 'Label']
all_data['Label'], uniq = pd.factorize(all_data['Label'])

X = all_data['Sentence'].values.tolist()
y = all_data['Label'].values.tolist()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state = random_state)

In [None]:
if oversample_dataset:
  # apply oversampling
  X_train = np.array(X_train).reshape(-1, 1)
  X_train, y_train = apply_oversampling(X_train, y_train)
  X_train = [x[0] for x in X_train.tolist()]

## **Tokenizing Dataset**

In [None]:
if (model_type=="SinBERT"):
  tokenizer = AutoTokenizer.from_pretrained("/content/drive/Shareddrives/FYP/Bert/SinBERT_large")
elif (model_type=="Bert"):
  tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
else:
  tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base", do_lower_case=True)

In [None]:
MAX_LEN = 128

def encode_batch(batch):
  """Encodes a batch of input data using the model tokenizer."""
  return tokenizer(batch, max_length=MAX_LEN, truncation=True, padding="max_length")

In [None]:
# Encode the input data
encoded_X_train = encode_batch(X_train)
encoded_X_test = encode_batch(X_test)

In [None]:
class DatasetObject(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

train_dataset = DatasetObject(encoded_X_train, y_train)
test_dataset = DatasetObject(encoded_X_test, y_test)

### Training

In [None]:
if (technique == 'humor'):
    num_labels=2
    id2label={ 0: "Non-humorous", 1: "Humorous"}
elif (technique == 'hate speech'):
    num_labels=3
    id2label={ 0: "Not offensive", 1: "Hate-Inducing", 2: "Abusive"}
else:
    num_labels=4
    id2label={ 0: "Positive", 1: "Negative", 2: "Neutral", 3:"Conflict"}

In [None]:
if (model_type=="SinBERT"):
  config = AutoConfig.from_pretrained("/content/drive/Shareddrives/FYP/Bert/SinBERT_large", num_labels= num_labels)
  model = AutoModelWithHeads.from_pretrained("/content/drive/Shareddrives/FYP/Bert/SinBERT_large", config=config)
elif (model_type=="Bert"):
  config = AutoConfig.from_pretrained("bert-base-uncased", num_labels= num_labels)
  model = AutoModelWithHeads.from_pretrained("bert-base-uncased", config=config)
else:
  config = AutoConfig.from_pretrained("xlm-roberta-base", num_labels=num_labels)
  model = AutoModelWithHeads.from_pretrained("xlm-roberta-base", config=config)

Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaModelWithHeads: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing XLMRobertaModelWithHeads from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaModelWithHeads from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaModelWithHeads were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['roberta.embeddings.position_ids']
You should probably TRAIN this model on a down-stream task to be able to use it for pr

In [None]:
# Load an adapter  
if load_adapter:
  print("loading adapter from", pretrained_adapter_path)
  model.load_adapter(pretrained_adapter_path, with_head=False)

# Add a new adapter  
else:
  print("adding new adapter", adapter_config)
  if adapter_config == "pfeiffer":
    config = AdapterConfig.load("pfeiffer", reduction_factor=12)
  else:
    config = AdapterConfig.load("houlsby")
  model.add_adapter("task_"+technique, config=config)

adding new adapter pfeiffer


In [None]:
# Add a classification head
model.add_classification_head(
  "task_"+technique,
  num_labels=num_labels,
  id2label=id2label
)

# Without Language Adapters
if lang_adapter_setting == "none":
  model.set_active_adapters("task_"+technique)

else:
  # Load language adapters
  lang_adapter_config = AdapterConfig.load("pfeiffer+inv")
  model.load_adapter("/content/drive/Shareddrives/FYP/TrainedAdapters/mlm", config=lang_adapter_config, load_as="si-en", with_head=False)
  # model.load_adapter("/content/drive/Shareddrives/FYP/TrainedAdapters/si_mlm", config=lang_adapter_config, load_as="si", with_head=False)
  model.load_adapter("/content/drive/Shareddrives/FYP/final_adapter/sinbert-lang-adapter/mlm", config=lang_adapter_config, load_as="si", with_head=False)
  config = AdapterConfig.load("pfeiffer", non_linearity="relu", reduction_factor=2)
  model.load_adapter("en/wiki@ukp", config=config)
  
  # Stack Language Adapters
  if lang_adapter_setting == "stack":
    print("stacking language adapters")
    model.set_active_adapters(Stack("en", "si", "si-en", "task_"+technique))

  # Parallel Language Adapters
  else:
    print("stacking parallel language adapters set")
    model.set_active_adapters(Stack(Parallel("en", "si", "si-en"), "task_"+technique))

# Train Adapter
model.train_adapter("task_"+technique)

stacking language adapters


In [None]:
# Unfreeze the model to train both the model and adapter
if unfreeze_model:
  model.freeze_model(False)

In [None]:
training_args = TrainingArguments(
    learning_rate=5e-4,
    num_train_epochs=6,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    # logging_steps=200,
    output_dir="./training_output",
    # overwrite_output_dir=True,
    # The next line is important to ensure the dataset labels are properly passed to the model
    remove_unused_columns=False,
    metric_for_best_model="eval_macro_f1",
    load_best_model_at_end=True,
    save_strategy="epoch",
    evaluation_strategy="epoch"
)

# def compute_accuracy(p: EvalPrediction):
#   preds = np.argmax(p.predictions, axis=1)
#   return {"acc": (preds == p.label_ids).mean()}

def compute_metrics(eval_pred):
    metric1 = load_metric("precision")
    metric2 = load_metric("recall")
    metric3 = load_metric("f1")
    metric4 = load_metric("accuracy")
    
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    precision = metric1.compute(predictions=predictions, references=labels, average="weighted")["precision"]
    recall = metric2.compute(predictions=predictions, references=labels, average="weighted")["recall"]
    f1 = metric3.compute(predictions=predictions, references=labels, average="weighted")["f1"]
    accuracy = metric4.compute(predictions=predictions, references=labels)["accuracy"]
    macro_precision = metric1.compute(predictions=predictions, references=labels, average="macro")["precision"]
    macro_recall = metric2.compute(predictions=predictions, references=labels, average="macro")["recall"]
    macro_f1 = metric3.compute(predictions=predictions, references=labels, average="macro")["f1"]
    return {"accuracy":accuracy, "precision": precision, "recall": recall, "f1": f1, "macro_precision": macro_precision, "macro_recall": macro_recall, "macro_f1": macro_f1}

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]
)

In [None]:
trainer.train()

***** Running training *****
  Num examples = 12166
  Num Epochs = 6
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 2286


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1,Macro Precision,Macro Recall,Macro F1
1,No log,0.183582,0.936391,0.936822,0.936391,0.923722,0.93975,0.686739,0.75174
2,0.245100,0.172273,0.93713,0.931279,0.93713,0.930651,0.866438,0.740737,0.787059
3,0.203500,0.172765,0.940828,0.935875,0.940828,0.93519,0.877398,0.756185,0.802079
4,0.175000,0.176466,0.940828,0.935875,0.940828,0.93519,0.877398,0.756185,0.802079
5,0.175000,0.197435,0.944527,0.941387,0.944527,0.937909,0.910387,0.751537,0.807233
6,0.152000,0.195219,0.940828,0.935935,0.940828,0.936351,0.866005,0.769582,0.808406


***** Running Evaluation *****
  Num examples = 1352
  Batch size = 32
Saving model checkpoint to ./training_output/checkpoint-381
Configuration saved in ./training_output/checkpoint-381/config.json
Model weights saved in ./training_output/checkpoint-381/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1352
  Batch size = 32
Saving model checkpoint to ./training_output/checkpoint-762
Configuration saved in ./training_output/checkpoint-762/config.json
Model weights saved in ./training_output/checkpoint-762/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1352
  Batch size = 32
Saving model checkpoint to ./training_output/checkpoint-1143
Configuration saved in ./training_output/checkpoint-1143/config.json
Model weights saved in ./training_output/checkpoint-1143/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1352
  Batch size = 32
Saving model checkpoint to ./training_output/checkpoint-1524
Configuration saved in ./training_output/check

In [None]:
results = trainer.evaluate()
results

***** Running Evaluation *****
  Num examples = 1352
  Batch size = 32


{'epoch': 6.0,
 'eval_accuracy': 0.9408284023668639,
 'eval_f1': 0.9363514990456706,
 'eval_loss': 0.1952187418937683,
 'eval_macro_f1': 0.8084063146557832,
 'eval_macro_precision': 0.8660051082359838,
 'eval_macro_recall': 0.7695818710023623,
 'eval_precision': 0.9359346240932435,
 'eval_recall': 0.9408284023668639,
 'eval_runtime': 22.2463,
 'eval_samples_per_second': 60.774,
 'eval_steps_per_second': 1.933}

In [None]:
# import csv

# with open('/content/drive/Shareddrives/FYP/Results/bert_pfeiffer_TA_'+technique+'.csv', 'a') as f:
#     # create the csv writer
#     writer = csv.writer(f)

#     # write a row to the csv file
#     writer.writerow([random_state, results['eval_accuracy'], results['eval_macro_precision'], results['eval_macro_recall'], results['eval_macro_f1']])

In [None]:
# classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer, device=training_args.device.index)

# classifier("Lol😅")

In [None]:
if save_adapter:
  model.save_adapter("/content/drive/Shareddrives/FYP/TrainedAdapters/Final/xlmr_task_adapter_parallel_"+technique, "task_"+technique)

In [None]:
# classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer, device=training_args.device.index)

# classifier("This is great!"