<a href="https://colab.research.google.com/github/HimashiRathnayake/Hate-Speech-Humor-Detection/blob/main/Temp/XLM_R.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Fine Tune XLM-R 
Humor Detection & Hate speech Detection of Sinhala-English Code-Mixed Data.

### **Parameters**

**User Parameters**

In [1]:
technique = "humor" #@param ["humor", "hate speech"]
experiment_no = "1" #@param [] {allow-input: true}
over_sampling_technique = "" #@param ["", "ROS","ADASYN", "SMOTE", "BorderlineSMOTE"]
sampling_strategy = "" #@param [] {allow-input: true}

if technique == "humor" :
  NO_OUTPUT_LAYERS = 2
  tag_set = ["Humorous", "Non-Humorous"]
elif technique == "hate speech":
  NO_OUTPUT_LAYERS = 3
  tag_set = ["Abusive", "Hate-Inducing", "Not offensive"]

In [2]:
MAX_LEN = 128
BATCH_SIZE = 32
LEARNING_RATE = 5e-5 # Janani - 2e-5
EPOCHS = 3 # Janani - 5

**Folder Paths**

In [3]:
dataset_path = "/content/drive/Shareddrives/FYP/corpus/çompleted_draft.csv"
model_save_path = "/content/drive/Shareddrives/FYP/Humor_HateSpeech_detection/XLMR/"+technique+"/"+experiment_no

**Dependencies**

In [4]:
# !pip install transformers
# !pip install sentencepiece
# !pip install datasets
# # !pip install optuna

In [5]:
import re
import time
import torch
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from transformers import XLMRobertaTokenizer, XLMRobertaForSequenceClassification, TrainingArguments, Trainer, AdamW, get_scheduler
from tqdm import tqdm
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from google.colab import drive
from keras.preprocessing.sequence import pad_sequences
from sklearn.metrics import classification_report
import io
import seaborn as sns
from datasets import load_metric
from collections import Counter
from imblearn.over_sampling import RandomOverSampler, SMOTE, ADASYN, BorderlineSMOTE
import matplotlib.pyplot as plt
% matplotlib inline



In [6]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### **Oversampling**

In [7]:
def apply_oversampling(x, y):

  (unique, counts) = np.unique(y, axis=0, return_counts=True)
  print("Class Distribution Without Oversampling", counts)

  # define oversampling strategy
  if (over_sampling_technique == ""):
    return x, y
  elif (over_sampling_technique == "ROS"):
    if (technique=="humor"):
      oversample = RandomOverSampler(sampling_strategy = float(sampling_strategy))
    else:
      sampling_ratio = sampling_strategy.split(":");
      oversample = RandomOverSampler(ratio = {
          0:int(counts[0]*float(sampling_ratio[0])), 
          1:int(counts[0]*float(sampling_ratio[1])), 
          2:int(counts[0]*float(sampling_ratio[2]))
          })
  elif (over_sampling_technique == "ADASYN"):
    oversample = ADASYN(sampling_strategy="minority")
  elif (over_sampling_technique == "SMOTE"):
    oversample = SMOTE()
  elif (over_sampling_technique == "BorderlineSMOTE"):
    oversample = BorderlineSMOTE()

  # fit and apply the transform
  X_over, y_over = oversample.fit_resample(x, y)

  (unique, counts) = np.unique(y_over, axis=0, return_counts=True)
  print("Class Distribution After Oversampling", counts)

  return X_over, y_over

### **Load & Preprocess Dataset**

In [8]:
def preprocess_texts(sentences):
  sentences = [re.sub(r'http\S+','',s) for s in sentences]
  sentences = [s.replace('#','') for s in sentences]
  sentences = [s + " [SEP] [CLS]" for s in sentences]
  return sentences

In [9]:
all_data = pd.read_csv(dataset_path)

if (technique == "humor"):
  all_data = all_data[['Sentence', 'Humor']]
elif (technique == "hate speech"):
  all_data = all_data[['Sentence', 'Hate_speech']]
else:
  all_data = all_data[['Sentence', 'Offensive']]

all_data.columns = ['Sentence', 'Label']
all_data['Label'], uniq = pd.factorize(all_data['Label'])

X = all_data['Sentence'].values.tolist()
y = all_data['Label'].values.tolist()

X = preprocess_texts(X)

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state = 2)
# print(y_train[:5])
# print(X_train[:5])
# X_train = np.array(X_train).reshape(-1, 1)
# X_train, y_train = apply_oversampling(X_train, y_train)
# X_train = [x[0] for x in X_train.tolist()]
# y_train = y_train.tolist()
# print(X_train[:5])
# print(y_train[:5])

##### **Preprocess Data**

In [11]:
tokenizer = XLMRobertaTokenizer.from_pretrained("xlm-roberta-base", do_lower_case=True) #######################################################uncased

In [12]:
encoded_X_train = tokenizer(X_train, truncation=True, padding=True, max_length=MAX_LEN)
encoded_X_test = tokenizer(X_test, truncation=True, padding=True, max_length=MAX_LEN)



In [13]:
class DatasetObject(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

train_data = DatasetObject(encoded_X_train, y_train)
validation_data = DatasetObject(encoded_X_test, y_test)

In [14]:
train_sampler = RandomSampler(train_data)
validation_sampler = SequentialSampler(validation_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=BATCH_SIZE)
validation_dataloader = DataLoader(validation_data, sampler=validation_sampler, batch_size=BATCH_SIZE)

### **Fine-Tuning**

#### **Initialize the model**

In [58]:
model = XLMRobertaForSequenceClassification.from_pretrained("xlm-roberta-base", num_labels=NO_OUTPUT_LAYERS)
model.cuda()
print("Done")

Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'roberta.pooler.dense.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.bias', 'roberta.pooler.dense.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense

Done


In [59]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
n_gpu = torch.cuda.device_count()
torch.cuda.get_device_name(0)

'Tesla K80'

In [60]:
import gc
# del all_data, encoded_X_train, encoded_X_test, X_train, X_test, tokenizer, DatasetObject
torch.cuda.empty_cache()
gc.collect()

526

#### **Fine-tuning in PyTorch with the Trainer API**

##### **Fine-tune & Test the model**

In [None]:
def compute_metrics(eval_pred):
    metric1 = load_metric("precision")
    metric2 = load_metric("recall")
    metric3 = load_metric("f1")
    metric4 = load_metric("accuracy")
    
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    precision = metric1.compute(predictions=predictions, references=labels, average="weighted")["precision"]
    recall = metric2.compute(predictions=predictions, references=labels, average="weighted")["recall"]
    f1 = metric3.compute(predictions=predictions, references=labels, average="weighted")["f1"]
    accuracy = metric4.compute(predictions=predictions, references=labels)["accuracy"]
    macro_precision = metric1.compute(predictions=predictions, references=labels, average="macro")["precision"]
    macro_recall = metric2.compute(predictions=predictions, references=labels, average="macro")["recall"]
    macro_f1 = metric3.compute(predictions=predictions, references=labels, average="macro")["f1"]
    return {"accuracy":accuracy, "precision": precision, "recall": recall, "f1": f1, "macro_precision": macro_precision, "macro_recall": macro_recall, "macro_f1": macro_f1}

In [None]:
# Default Hyperparameters
# training_args = TrainingArguments("test_trainer") 
# learning_rate=5e-5, batch_size=8,  weight_decay=0, num_train_epochs=3

training_args = TrainingArguments(
    learning_rate = LEARNING_RATE,
    per_device_train_batch_size = BATCH_SIZE,
    per_device_eval_batch_size = BATCH_SIZE,
    output_dir = model_save_path,
    num_train_epochs = NO_EPOCHS,
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
)

In [None]:
trainer = Trainer(
    model = model, 
    args = training_args, 
    train_dataset = train_dataset,
    eval_dataset = test_dataset,
    compute_metrics = compute_metrics,
)

**Train the model**

In [None]:
trainer.train()

***** Running training *****
  Num examples = 13742
  Num Epochs = 3
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 1290


Epoch,Training Loss,Validation Loss


KeyboardInterrupt: ignored

**Test the fine-tuned model**

In [None]:
trainer.evaluate()

##### **Hyperparameter Search**

In [None]:
args = TrainingArguments(
    "test-glue",
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
    learning_rate=LEARNING_RATE,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    num_train_epochs=NO_EPOCHS,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model="eval_macro_f1",
)

In [None]:
def my_hp_space(trial):
    return {
        # "learning_rate": trial.suggest_float("learning_rate", 1e-7, 1e-3, log=True),
        "num_train_epochs": trial.suggest_discrete_uniform("num_train_epochs", 1, 5, 1),
        # "seed": trial.suggest_int("seed", 1, 40),
        # "per_device_train_batch_size": trial.suggest_categorical("per_device_train_batch_size", [4, 8, 16, 32, 64]),
    }

In [None]:
trainer = Trainer(
    model_init=model_init,
    args=args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    # tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

best_run = trainer.hyperparameter_search(n_trials=5, direction="maximize", hp_space=my_hp_space)

#### **Fine tuning with native PyTorch**

**Fine-tune the model**

In [61]:
def epoch_time(start_time, end_time):
  elapsed_time = end_time - start_time
  elapsed_mins = int(elapsed_time / 60)
  elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
  return elapsed_mins, elapsed_secs

In [62]:
# apply different hyperpameters for specific parameter groups
# param_optimizer = list(model.named_parameters())

# optimizer_grouped_parameters = [
#     {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
#      'weight_decay_rate': 0.01},
#     {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
#      'weight_decay_rate': 0.0}
# ]
# optimizer = AdamW(optimizer_grouped_parameters, lr=LEARNING_RATE)
optimizer = AdamW(model.parameters(), lr=LEARNING_RATE)

In [None]:
train_loss_set = []
num_training_steps = EPOCHS * len(train_dataloader)
progress_bar = tqdm(range(num_training_steps))
model.train()

for epoch in range(EPOCHS):
  start_time = time.time()

  tr_loss = 0
  nb_tr_steps = 0
  
  for batch in train_dataloader:

    batch = {k: v.to(device) for k, v in batch.items()}
    outputs = model(**batch)
    loss = outputs.loss
    loss.backward()

    optimizer.step()
    optimizer.zero_grad()
    progress_bar.update(1)

    train_loss_set.append(loss.item())    
    
    tr_loss += loss.item()
    nb_tr_steps += 1

  end_time = time.time()

  print(epoch_time(start_time,end_time))
  print("\nTrain loss: {}".format(tr_loss/nb_tr_steps))


100%|██████████| 1143/1143 [28:02<00:00,  1.47s/it]

  0%|          | 1/1143 [00:01<26:18,  1.38s/it][A
  0%|          | 2/1143 [00:02<26:16,  1.38s/it][A
  0%|          | 3/1143 [00:04<26:17,  1.38s/it][A
  0%|          | 4/1143 [00:05<26:20,  1.39s/it][A
  0%|          | 5/1143 [00:06<26:17,  1.39s/it][A
  1%|          | 6/1143 [00:08<26:14,  1.39s/it][A
  1%|          | 7/1143 [00:09<26:10,  1.38s/it][A
  1%|          | 8/1143 [00:11<26:10,  1.38s/it][A
  1%|          | 9/1143 [00:12<26:09,  1.38s/it][A
  1%|          | 10/1143 [00:13<26:06,  1.38s/it][A
  1%|          | 11/1143 [00:15<26:03,  1.38s/it][A
  1%|          | 12/1143 [00:16<26:01,  1.38s/it][A
  1%|          | 13/1143 [00:17<26:02,  1.38s/it][A
  1%|          | 14/1143 [00:19<26:03,  1.38s/it][A
  1%|▏         | 15/1143 [00:20<25:59,  1.38s/it][A
  1%|▏         | 16/1143 [00:22<25:59,  1.38s/it][A
  1%|▏         | 17/1143 [00:23<25:56,  1.38s/it][A
  2%|▏         | 18/1143 [00:24<25:53,  1.38s/it][A
 

(8, 41)

Train loss: 0.2646889275482555



 33%|███▎      | 382/1143 [08:42<14:48,  1.17s/it][A
 34%|███▎      | 383/1143 [08:43<15:33,  1.23s/it][A
 34%|███▎      | 384/1143 [08:45<16:05,  1.27s/it][A
 34%|███▎      | 385/1143 [08:46<16:26,  1.30s/it][A
 34%|███▍      | 386/1143 [08:47<16:41,  1.32s/it][A
 34%|███▍      | 387/1143 [08:49<16:50,  1.34s/it][A
 34%|███▍      | 388/1143 [08:50<16:56,  1.35s/it][A
 34%|███▍      | 389/1143 [08:52<17:01,  1.35s/it][A
 34%|███▍      | 390/1143 [08:53<17:02,  1.36s/it][A
 34%|███▍      | 391/1143 [08:54<17:03,  1.36s/it][A
 34%|███▍      | 392/1143 [08:56<17:03,  1.36s/it][A
 34%|███▍      | 393/1143 [08:57<17:03,  1.37s/it][A
 34%|███▍      | 394/1143 [08:58<17:03,  1.37s/it][A
 35%|███▍      | 395/1143 [09:00<17:03,  1.37s/it][A
 35%|███▍      | 396/1143 [09:01<17:02,  1.37s/it][A
 35%|███▍      | 397/1143 [09:03<17:02,  1.37s/it][A
 35%|███▍      | 398/1143 [09:04<16:59,  1.37s/it][A
 35%|███▍      | 399/1143 [09:05<16:58,  1.37s/it][A
 35%|███▍      | 400/1143 [

(8, 40)

Train loss: 0.21376718298148295



 67%|██████▋   | 763/1143 [17:22<07:23,  1.17s/it][A
 67%|██████▋   | 764/1143 [17:24<07:45,  1.23s/it][A
 67%|██████▋   | 765/1143 [17:25<08:00,  1.27s/it][A
 67%|██████▋   | 766/1143 [17:26<08:09,  1.30s/it][A
 67%|██████▋   | 767/1143 [17:28<08:15,  1.32s/it][A
 67%|██████▋   | 768/1143 [17:29<08:20,  1.33s/it][A
 67%|██████▋   | 769/1143 [17:30<08:22,  1.34s/it][A
 67%|██████▋   | 770/1143 [17:32<08:23,  1.35s/it][A
 67%|██████▋   | 771/1143 [17:33<08:24,  1.36s/it][A
 68%|██████▊   | 772/1143 [17:35<08:24,  1.36s/it][A
 68%|██████▊   | 773/1143 [17:36<08:24,  1.36s/it][A
 68%|██████▊   | 774/1143 [17:37<08:24,  1.37s/it][A
 68%|██████▊   | 775/1143 [17:39<08:23,  1.37s/it][A
 68%|██████▊   | 776/1143 [17:40<08:23,  1.37s/it][A
 68%|██████▊   | 777/1143 [17:41<08:20,  1.37s/it][A
 68%|██████▊   | 778/1143 [17:43<08:19,  1.37s/it][A
 68%|██████▊   | 779/1143 [17:44<08:17,  1.37s/it][A
 68%|██████▊   | 780/1143 [17:46<08:16,  1.37s/it][A
 68%|██████▊   | 781/1143 [

**Validate the model**

In [63]:
accuracy = load_metric("accuracy")
precision = load_metric("precision")
recall = load_metric("recall")
f1 = load_metric("f1")
macro_precision = load_metric("precision")
macro_recall = load_metric("recall")
macro_f1 = load_metric("f1")

model.eval()
for batch in validation_dataloader:
    batch = {k: v.to(device) for k, v in batch.items()}
    with torch.no_grad():
        outputs = model(**batch)

    logits = outputs.logits
    predictions = torch.argmax(logits, dim=-1)
    accuracy.add_batch(predictions=predictions, references=batch["labels"])
    precision.add_batch(predictions=predictions, references=batch["labels"])
    recall.add_batch(predictions=predictions, references=batch["labels"])
    f1.add_batch(predictions=predictions, references=batch["labels"])
    macro_precision.add_batch(predictions=predictions, references=batch["labels"])
    macro_recall.add_batch(predictions=predictions, references=batch["labels"])
    macro_f1.add_batch(predictions=predictions, references=batch["labels"])

print(accuracy.compute())
print(precision.compute(average="weighted"))
print(recall.compute(average="weighted"))
print(f1.compute(average="weighted"))
print("macro averages:")
print(macro_precision.compute(average="macro"))
print(macro_recall.compute(average="macro"))
print(macro_f1.compute(average="macro"))

{'accuracy': 0.11168639053254438}
{'precision': 0.012473849830188018}
{'recall': 0.11168639053254438}
{'f1': 0.02244131067254052}
macro averages:
{'precision': 0.05584319526627219}
{'recall': 0.5}
{'f1': 0.10046573519627411}


  _warn_prf(average, modifier, msg_start, len(result))


In [65]:
print(np.mean([0.9312130177514792, 0.9142011834319527, 0.8883136094674556, 0.9282544378698225, 0.9349112426035503]))
print(np.mean([0.9240920952413699, 0.8357638037883829, 0.7891010687650993, 0.9192849828323595, 0.9299954483386437]))
print(np.mean([0.9312130177514792, 0.9142011834319527, 0.8883136094674556, 0.9282544378698225, 0.9349112426035503]))
print(np.mean([0.9251771873648115, 0.8732246234326844, 0.8357733215592746, 0.919419550184051, 0.9225245299977529]))
print(np.mean([0.8341697481951047, 0.45710059171597633, 0.4441568047337278, 0.8302427564604542, 0.8923076923076922]))
print(np.mean([0.7341065954467794, 0.50000000000000000, 0.5, 0.6990998363338788, 0.6741620258484817]))
print(np.mean([0.7727373780643203, 0.47758887171561054, 0.4704269486878182, 0.7440068710410993, 0.7325949367088608]))

0.9193786982248522
0.879647479793171
0.9193786982248522
0.8952238425077148
0.6915955186825911
0.621473691525828
0.6394710012435418
