In [1]:
!pip install -qq transformers
!pip install -qq sentencepiece
!pip install -qq datasets


[K     |████████████████████████████████| 3.5 MB 14.5 MB/s 
[K     |████████████████████████████████| 67 kB 6.2 MB/s 
[K     |████████████████████████████████| 895 kB 75.8 MB/s 
[K     |████████████████████████████████| 6.8 MB 75.6 MB/s 
[K     |████████████████████████████████| 596 kB 75.2 MB/s 
[K     |████████████████████████████████| 1.2 MB 14.0 MB/s 
[K     |████████████████████████████████| 311 kB 13.5 MB/s 
[K     |████████████████████████████████| 243 kB 73.6 MB/s 
[K     |████████████████████████████████| 1.1 MB 75.8 MB/s 
[K     |████████████████████████████████| 133 kB 70.0 MB/s 
[K     |████████████████████████████████| 144 kB 67.2 MB/s 
[K     |████████████████████████████████| 94 kB 3.8 MB/s 
[K     |████████████████████████████████| 271 kB 66.4 MB/s 
[?25h

In [2]:
import transformers
import datasets
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AdamW, get_linear_schedule_with_warmup,Trainer, TrainingArguments
from transformers.file_utils import is_tf_available, is_torch_available, is_torch_tpu_available
import torch
import numpy as np
import pandas as pd
import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc
from sklearn.metrics import confusion_matrix, classification_report
from collections import defaultdict
import random
from textwrap import wrap
from datetime import datetime
from datasets import load_from_disk
from datasets import Dataset
from sklearn.metrics import accuracy_score,classification_report, confusion_matrix
from sklearn.metrics import precision_recall_fscore_support

In [3]:
from torch import nn

In [4]:
# the model we gonna train, base uncased BERT
# check text classification models here: https://huggingface.co/models?filter=text-classification
MODEL_NAME = "distilbert-base-uncased"
# max sequence length for each document/sentence sample
MAX_LENGTH = 64
BATCH_SIZE = 16
EPOCHS = 3
LEARNING_RATE= 6.58e-5
WEIGHT_DECAY = 0.289
WARMUP_STEPS = 464
RANDOM_SEED=22


device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

In [5]:


def set_seed(seed):
    """Set all seeds to make results reproducible (deterministic mode).
       When seed is None, disables deterministic mode.
    :param seed: an integer to your choosing
    """
    if seed is not None:
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        np.random.seed(seed)
        random.seed(seed)

def compute_metrics(pred):
  labels = pred.label_ids
  preds = pred.predictions.argmax(-1)
  # calculate accuracy using sklearn's function
  acc = accuracy_score(labels, preds)
  precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='macro')
  acc = accuracy_score(labels, preds)
  confusion_matrix = classification_report(labels, preds, digits=4,output_dict=True)
  return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall,
        'hate_f1': confusion_matrix["0"]["f1-score"],
        'hate_recall': confusion_matrix["0"]["recall"],
        'hate_precision': confusion_matrix["0"]["precision"],
        'offensive_f1': confusion_matrix["1"]["f1-score"],
        'offensive_recall': confusion_matrix["1"]["recall"],
        'offensive_precision': confusion_matrix["1"]["precision"],
        'normal_f1': confusion_matrix["2"]["f1-score"],
        'normal_recall': confusion_matrix["2"]["recall"],
        'normal_precision': confusion_matrix["2"]["precision"],    
  }


def model_init():
  temp_model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME,num_labels=3).to(device)
  return temp_model

# Code modified from Stabilizer library to handle DistilBERT architecture
#https://github.com/flowerpot-ai/stabilizer


def get_optimizer_parameters_with_llrd(model, peak_lr, multiplicative_factor):
    num_encoder_layers = len(model.distilbert.transformer.layer)
    # Task specific layer gets the peak_lr
    tsl_parameters = [
        {
            "params": [param for name, param in model.named_parameters() if 'distilbert' not in name],
            "param_names": [name for name, param in model.named_parameters() if 'distilbert' not in name],
            "lr": peak_lr,
            "name": "tsl",
        }
    ]

    # Starting from the last encoder layer each encoder layers get a lr defined by
    # current_layer_lr = prev_layer_lr * multiplicative_factor
    # the last encoder layer lr = peak_lr * multiplicative_factor
    encoder_parameters = [
        {
            "params": [param for name, param in model.named_parameters() if f"distilbert.transformer.layer.{layer_num}" in name],
            "param_names": [name for name, param in model.named_parameters() if f"distilbert.transformer.layer.{layer_num}" in name],
            "lr": peak_lr * (multiplicative_factor ** (num_encoder_layers - layer_num)),
            "name": f"layer_{layer_num}",
        }
        for layer_num, layer in enumerate(model.distilbert.transformer.layer)
    ]

    # Embedding layer gets embedding layer lr = first encoder layer lr * multiplicative_factor
    embedding_parameters = [
        {
            "params": [param for name, param in model.named_parameters() if 'embeddings' in name],
            "param_names": [name for name, param in model.named_parameters() if 'embeddings' in name],
            "lr": peak_lr * (multiplicative_factor ** (num_encoder_layers + 1)),
            "name": "embedding",
        }
    ]
    return tsl_parameters + encoder_parameters + embedding_parameters


def timestamp():
    dateTimeObj = datetime.now()
    timestampStr = dateTimeObj.strftime("%d-%b-%Y (%H:%M:%S.%f)")
    print(timestampStr)

In [6]:
set_seed(RANDOM_SEED)


In [7]:
hatetwit_dataset_dfs = load_from_disk('/content/drive/MyDrive/Dissertation/datasets/hatetwit_'+str(1))
train_dataset = hatetwit_dataset_dfs ["train"].remove_columns(["input_ids_bert","attention_mask_bert","token_type_ids_bert"])
eval_dataset = hatetwit_dataset_dfs ["validation"].remove_columns(["input_ids_bert","attention_mask_bert","token_type_ids_bert"])
test_dataset = hatetwit_dataset_dfs ["test"].remove_columns(["input_ids_bert","attention_mask_bert","token_type_ids_bert"])

In [8]:
training_args = TrainingArguments(
    output_dir='/content/drive/MyDrive/Dissertation/disbert_hate_llrd/results',          # output directory
    num_train_epochs=EPOCHS,              # total number of training epochs
    save_strategy ="epoch" ,
    per_device_train_batch_size=BATCH_SIZE,  # batch size per device during training
    per_device_eval_batch_size=BATCH_SIZE,   # batch size for evaluation
    warmup_steps = WARMUP_STEPS,
    weight_decay= WEIGHT_DECAY,               # strength of weight decay
    learning_rate= LEARNING_RATE, 
    logging_dir='./disbert_hate/logs',     # directory for storing logs
    load_best_model_at_end=True,     # load the best model when finished training (default metric is loss)
    evaluation_strategy="epoch",
)

In [9]:
model = model_init()
trainer_one = Trainer(
    model =model,                         # the instantiated Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=train_dataset,         # training dataset
    eval_dataset=eval_dataset,          # evaluation dataset
    compute_metrics=compute_metrics,     # the callback that computes metrics of interest
    #optimizers = (optimizer,scheduler)
)
trainer_one.create_optimizer()
parameters = get_optimizer_parameters_with_llrd(model, LEARNING_RATE, 0.9)

Downloading:   0%|          | 0.00/483 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/256M [00:00<?, ?B/s]

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.bias', 'vocab_transform.weight', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'pre_classifier.weight', 'pre_classi

In [10]:
model = model_init()
trainer_alpha = Trainer(
    model =model,                         # the instantiated Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=train_dataset,         # training dataset
    eval_dataset=eval_dataset,          # evaluation dataset
    compute_metrics=compute_metrics,     # the callback that computes metrics of interest
    #optimizers = (optimizer,scheduler)
)
trainer_alpha.create_optimizer()
parameters = get_optimizer_parameters_with_llrd(model, LEARNING_RATE, 0.85)
trainer_alpha.optimizer = AdamW(parameters, lr=LEARNING_RATE,weight_decay=WEIGHT_DECAY)
trainer_alpha.train()
trainer_alpha.evaluate(test_dataset)

loading configuration file https://huggingface.co/distilbert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/23454919702d26495337f3da04d1655c7ee010d5ec9d77bdb9e399e00302c0a1.91b885ab15d631bf9cee9dc9d25ece0afd932f2f5130eba28f2055b2220c0333
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "transformers_version": "4.16.2",
  "vocab_size": 30522
}

loading we

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Hate F1,Hate Recall,Hate Precision,Offensive F1,Offensive Recall,Offensive Precision,Normal F1,Normal Recall,Normal Precision
1,0.5556,0.529095,0.785791,0.742059,0.740721,0.751317,0.729888,0.816901,0.659626,0.860329,0.842651,0.878764,0.63596,0.594398,0.683771
2,0.4379,0.481802,0.800816,0.752736,0.760185,0.752808,0.765977,0.81992,0.718695,0.871701,0.880415,0.863158,0.620531,0.558091,0.698701
3,0.3071,0.523752,0.807684,0.765247,0.765057,0.76724,0.782651,0.816901,0.751156,0.875347,0.874861,0.875834,0.637744,0.609959,0.668182


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: __index_level_0__, sentence.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16
Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_llrd/results/checkpoint-2330
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_llrd/results/checkpoint-2330/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_llrd/results/checkpoint-2330/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: __index_level_0__, sentence.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16
Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_llrd/results/checkpoint-4660
Configuration saved in /content/drive/MyDrive/Disser

{'epoch': 3.0,
 'eval_accuracy': 0.7917561185057965,
 'eval_f1': 0.7440340099094213,
 'eval_hate_f1': 0.7636022514071295,
 'eval_hate_precision': 0.7146619841966637,
 'eval_hate_recall': 0.8197381671701913,
 'eval_loss': 0.5254986882209778,
 'eval_normal_f1': 0.607536231884058,
 'eval_normal_precision': 0.6894736842105263,
 'eval_normal_recall': 0.5430051813471503,
 'eval_offensive_f1': 0.8609635464370765,
 'eval_offensive_precision': 0.8517578832910475,
 'eval_offensive_recall': 0.8703703703703703,
 'eval_precision': 0.7519645172327459,
 'eval_recall': 0.7443712396292373,
 'eval_runtime': 3.8567,
 'eval_samples_per_second': 1207.783,
 'eval_steps_per_second': 75.713}

In [None]:
model = model_init()
trainer_beta = Trainer(
    model =model,                         # the instantiated Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=train_dataset,         # training dataset
    eval_dataset=eval_dataset,          # evaluation dataset
    compute_metrics=compute_metrics,     # the callback that computes metrics of interest
    #optimizers = (optimizer,scheduler)
)
trainer_beta.create_optimizer()
parameters = get_optimizer_parameters_with_llrd(model, LEARNING_RATE, 0.75)
trainer_beta.optimizer = AdamW(parameters, lr=LEARNING_RATE,weight_decay=WEIGHT_DECAY)
trainer_beta.train()
trainer_beta.evaluate(test_dataset)

loading configuration file https://huggingface.co/distilbert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/23454919702d26495337f3da04d1655c7ee010d5ec9d77bdb9e399e00302c0a1.91b885ab15d631bf9cee9dc9d25ece0afd932f2f5130eba28f2055b2220c0333
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "transformers_version": "4.16.2",
  "vocab_size": 30522
}

loading we

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Hate F1,Hate Recall,Hate Precision,Offensive F1,Offensive Recall,Offensive Precision,Normal F1,Normal Recall,Normal Precision
1,0.5606,0.540648,0.771625,0.728088,0.722939,0.741202,0.720071,0.813883,0.64565,0.848891,0.821548,0.878116,0.615301,0.588174,0.645051
2,0.4581,0.490049,0.799957,0.752008,0.759881,0.748434,0.759592,0.78672,0.734272,0.871291,0.885968,0.857092,0.625142,0.572614,0.688279
3,0.3494,0.507668,0.805752,0.762921,0.762188,0.766818,0.765234,0.814889,0.721282,0.875442,0.870418,0.880524,0.648087,0.615145,0.684758


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: __index_level_0__, sentence.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16
Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_llrd/results/checkpoint-2330
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_llrd/results/checkpoint-2330/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_llrd/results/checkpoint-2330/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: __index_level_0__, sentence.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16
Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_llrd/results/checkpoint-4660
Configuration saved in /content/drive/MyDrive/Disser

{'epoch': 3.0,
 'eval_accuracy': 0.791970802919708,
 'eval_f1': 0.7450416408981178,
 'eval_hate_f1': 0.7542087542087542,
 'eval_hate_precision': 0.7219152854511971,
 'eval_hate_recall': 0.7895266868076536,
 'eval_loss': 0.5253387093544006,
 'eval_normal_f1': 0.6192373363688105,
 'eval_normal_precision': 0.6868686868686869,
 'eval_normal_recall': 0.5637305699481865,
 'eval_offensive_f1': 0.8616788321167883,
 'eval_offensive_precision': 0.8492805755395684,
 'eval_offensive_recall': 0.8744444444444445,
 'eval_precision': 0.7526881826198174,
 'eval_recall': 0.7425672337334283,
 'eval_runtime': 3.6689,
 'eval_samples_per_second': 1269.583,
 'eval_steps_per_second': 79.587}

In [None]:
model = model_init()
trainer_theta = Trainer(
    model =model,                         # the instantiated Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=train_dataset,         # training dataset
    eval_dataset=eval_dataset,          # evaluation dataset
    compute_metrics=compute_metrics,     # the callback that computes metrics of interest
    #optimizers = (optimizer,scheduler)
)
trainer_theta.create_optimizer()
parameters = get_optimizer_parameters_with_llrd(model, LEARNING_RATE, 0.95)
trainer_theta.optimizer = AdamW(parameters, lr=LEARNING_RATE,weight_decay=WEIGHT_DECAY)
trainer_theta.train()
trainer_theta.evaluate(test_dataset)

loading configuration file https://huggingface.co/distilbert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/23454919702d26495337f3da04d1655c7ee010d5ec9d77bdb9e399e00302c0a1.91b885ab15d631bf9cee9dc9d25ece0afd932f2f5130eba28f2055b2220c0333
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "transformers_version": "4.16.2",
  "vocab_size": 30522
}

loading we

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Hate F1,Hate Recall,Hate Precision,Offensive F1,Offensive Recall,Offensive Precision,Normal F1,Normal Recall,Normal Precision
1,0.5601,0.522034,0.791157,0.74453,0.747597,0.74407,0.73025,0.762575,0.700555,0.865721,0.870048,0.861437,0.637617,0.599585,0.680801
2,0.4306,0.483114,0.809401,0.761718,0.773184,0.755761,0.774951,0.796781,0.754286,0.877961,0.898926,0.857951,0.632243,0.571577,0.707317
3,0.2839,0.552262,0.811118,0.769713,0.76877,0.772498,0.784822,0.821932,0.750919,0.877831,0.875231,0.880447,0.646486,0.620332,0.674944


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: __index_level_0__, sentence.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16
Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_llrd/results/checkpoint-2330
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_llrd/results/checkpoint-2330/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_llrd/results/checkpoint-2330/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: __index_level_0__, sentence.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16
Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_llrd/results/checkpoint-4660
Configuration saved in /content/drive/MyDrive/Disser

{'epoch': 3.0,
 'eval_accuracy': 0.7990553885787892,
 'eval_f1': 0.7532255278894512,
 'eval_hate_f1': 0.7676767676767677,
 'eval_hate_precision': 0.7348066298342542,
 'eval_hate_recall': 0.8036253776435045,
 'eval_loss': 0.5167168974876404,
 'eval_normal_f1': 0.6264367816091954,
 'eval_normal_precision': 0.7032258064516129,
 'eval_normal_recall': 0.5647668393782384,
 'eval_offensive_f1': 0.8655630343823904,
 'eval_offensive_precision': 0.8505541651769754,
 'eval_offensive_recall': 0.8811111111111111,
 'eval_precision': 0.7628622004876141,
 'eval_recall': 0.7498344427109513,
 'eval_runtime': 3.7025,
 'eval_samples_per_second': 1258.077,
 'eval_steps_per_second': 78.866}

In [None]:
model = model_init()
trainer_one = Trainer(
    model =model,                         # the instantiated Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=train_dataset,         # training dataset
    eval_dataset=eval_dataset,          # evaluation dataset
    compute_metrics=compute_metrics,     # the callback that computes metrics of interest
    #optimizers = (optimizer,scheduler)
)
trainer_one.create_optimizer()
parameters = get_optimizer_parameters_with_llrd(model, LEARNING_RATE, 0.9)

In [None]:
trainer_one.optimizer = AdamW(parameters, lr=LEARNING_RATE,weight_decay=WEIGHT_DECAY)



In [None]:
trainer_one.train()


The following columns in the training set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: sentence.
***** Running training *****
  Num examples = 37265
  Num Epochs = 3
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 6990


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Hate F1,Hate Recall,Hate Precision,Offensive F1,Offensive Recall,Offensive Precision,Normal F1,Normal Recall,Normal Precision
1,0.5591,0.534393,0.781713,0.740093,0.735141,0.751714,0.724572,0.809859,0.655537,0.856053,0.831174,0.882469,0.639654,0.614108,0.667418
2,0.4378,0.480476,0.805538,0.755238,0.768154,0.750819,0.763045,0.801811,0.727854,0.877377,0.896705,0.858865,0.625293,0.553942,0.717742
3,0.2963,0.542798,0.80704,0.76387,0.762546,0.767495,0.778255,0.820926,0.739801,0.876697,0.87264,0.880792,0.636659,0.608921,0.667045


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: sentence, __index_level_0__.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16
Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_llrd/results/checkpoint-2330
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_llrd/results/checkpoint-2330/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_llrd/results/checkpoint-2330/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: sentence, __index_level_0__.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16
Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_llrd/results/checkpoint-4660
Configuration saved in /content/drive/MyDrive/Disser

TrainOutput(global_step=6990, training_loss=0.46294733574120955, metrics={'train_runtime': 333.3997, 'train_samples_per_second': 335.318, 'train_steps_per_second': 20.966, 'total_flos': 1851182116709760.0, 'train_loss': 0.46294733574120955, 'epoch': 3.0})

In [None]:
timestamp()


13-Feb-2022 (13:11:04.588831)


In [None]:
trainer_one.evaluate(test_dataset)

The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: sentence, __index_level_0__.
***** Running Evaluation *****
  Num examples = 4658
  Batch size = 16


{'epoch': 3.0,
 'eval_accuracy': 0.7945470158866467,
 'eval_f1': 0.7443414496025772,
 'eval_hate_f1': 0.7698945349952061,
 'eval_hate_precision': 0.7346752058554438,
 'eval_hate_recall': 0.8086606243705942,
 'eval_loss': 0.5204463005065918,
 'eval_normal_f1': 0.5984804208065458,
 'eval_normal_precision': 0.6863270777479893,
 'eval_normal_recall': 0.5305699481865285,
 'eval_offensive_f1': 0.8646493930059794,
 'eval_offensive_precision': 0.846399432422845,
 'eval_offensive_recall': 0.8837037037037037,
 'eval_precision': 0.7558005720087593,
 'eval_recall': 0.7409780920869421,
 'eval_runtime': 5.9248,
 'eval_samples_per_second': 786.186,
 'eval_steps_per_second': 49.284}

In [None]:
timestamp()

13-Feb-2022 (13:11:10.533979)


In [None]:
model =model_init()
parameters = get_optimizer_parameters_with_llrd(model, LEARNING_RATE, 0.8)
trainer_two = Trainer(
    model =model,                         # the instantiated Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=train_dataset,         # training dataset
    eval_dataset=eval_dataset,          # evaluation dataset
    compute_metrics=compute_metrics,     # the callback that computes metrics of interest
)
trainer_two.create_optimizer()
trainer_two.optimizer = AdamW(parameters, lr=LEARNING_RATE,weight_decay=WEIGHT_DECAY)

loading configuration file https://huggingface.co/distilbert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/23454919702d26495337f3da04d1655c7ee010d5ec9d77bdb9e399e00302c0a1.91b885ab15d631bf9cee9dc9d25ece0afd932f2f5130eba28f2055b2220c0333
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "transformers_version": "4.16.2",
  "vocab_size": 30522
}

loading we

In [None]:
trainer_two.train()

The following columns in the training set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: sentence.
***** Running training *****
  Num examples = 37265
  Num Epochs = 3
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 6990


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Hate F1,Hate Recall,Hate Precision,Offensive F1,Offensive Recall,Offensive Precision,Normal F1,Normal Recall,Normal Precision
1,0.5583,0.529969,0.778064,0.734772,0.72867,0.746141,0.725535,0.801811,0.66251,0.855183,0.830803,0.881037,0.623599,0.605809,0.642464
2,0.4503,0.48671,0.801889,0.754699,0.764162,0.750584,0.764165,0.793763,0.736695,0.871003,0.887449,0.855155,0.628931,0.570539,0.700637
3,0.3295,0.515521,0.803821,0.760911,0.759127,0.765432,0.77178,0.81992,0.72898,0.873602,0.867456,0.879835,0.637351,0.608921,0.668565


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: sentence, __index_level_0__.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16
Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_llrd/results/checkpoint-2330
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_llrd/results/checkpoint-2330/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_llrd/results/checkpoint-2330/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: sentence, __index_level_0__.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16
Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_llrd/results/checkpoint-4660
Configuration saved in /content/drive/MyDrive/Disser

TrainOutput(global_step=6990, training_loss=0.47832156987660945, metrics={'train_runtime': 333.1142, 'train_samples_per_second': 335.606, 'train_steps_per_second': 20.984, 'total_flos': 1851182116709760.0, 'train_loss': 0.47832156987660945, 'epoch': 3.0})

In [None]:
trainer_two.evaluate(test_dataset)

The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: sentence, __index_level_0__.
***** Running Evaluation *****
  Num examples = 4658
  Batch size = 16


{'epoch': 3.0,
 'eval_accuracy': 0.7934735938170889,
 'eval_f1': 0.7467309178764472,
 'eval_hate_f1': 0.7621776504297995,
 'eval_hate_precision': 0.7247956403269755,
 'eval_hate_recall': 0.8036253776435045,
 'eval_loss': 0.5242156386375427,
 'eval_normal_f1': 0.6161790017211703,
 'eval_normal_precision': 0.6902313624678663,
 'eval_normal_recall': 0.5564766839378238,
 'eval_offensive_f1': 0.8618361014783721,
 'eval_offensive_precision': 0.8495861820798849,
 'eval_offensive_recall': 0.8744444444444445,
 'eval_precision': 0.7548710616249089,
 'eval_recall': 0.7448488353419243,
 'eval_runtime': 3.7187,
 'eval_samples_per_second': 1252.603,
 'eval_steps_per_second': 78.523}

In [None]:
model = model_init()
parameters = get_optimizer_parameters_with_llrd(model, LEARNING_RATE, 0.7)
trainer_three = Trainer(
    model =model,                         # the instantiated Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=train_dataset,         # training dataset
    eval_dataset=eval_dataset,          # evaluation dataset
    compute_metrics=compute_metrics,     # the callback that computes metrics of interest
)

trainer_three.create_optimizer()
trainer_three.optimizer = AdamW(parameters, lr=LEARNING_RATE,weight_decay=WEIGHT_DECAY)

loading configuration file https://huggingface.co/distilbert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/23454919702d26495337f3da04d1655c7ee010d5ec9d77bdb9e399e00302c0a1.91b885ab15d631bf9cee9dc9d25ece0afd932f2f5130eba28f2055b2220c0333
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "transformers_version": "4.16.2",
  "vocab_size": 30522
}

loading we

In [None]:
trainer_three.train()

The following columns in the training set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: sentence.
***** Running training *****
  Num examples = 37265
  Num Epochs = 3
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 6990


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Hate F1,Hate Recall,Hate Precision,Offensive F1,Offensive Recall,Offensive Precision,Normal F1,Normal Recall,Normal Precision
1,0.562,0.544932,0.772483,0.728405,0.723399,0.740677,0.715884,0.804829,0.644641,0.851031,0.82488,0.878895,0.6183,0.592324,0.646659
2,0.4674,0.495549,0.793303,0.74334,0.754154,0.738063,0.750853,0.774648,0.728477,0.865726,0.885598,0.846726,0.613441,0.553942,0.687259
3,0.3754,0.501198,0.799742,0.75558,0.754741,0.760058,0.758102,0.811871,0.711013,0.871575,0.865605,0.877628,0.637061,0.602697,0.675581


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: sentence, __index_level_0__.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16
Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_llrd/results/checkpoint-2330
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_llrd/results/checkpoint-2330/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_llrd/results/checkpoint-2330/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: sentence, __index_level_0__.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16
Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_llrd/results/checkpoint-4660
Configuration saved in /content/drive/MyDrive/Disser

TrainOutput(global_step=6990, training_loss=0.49917930635771524, metrics={'train_runtime': 332.078, 'train_samples_per_second': 336.653, 'train_steps_per_second': 21.049, 'total_flos': 1851182116709760.0, 'train_loss': 0.49917930635771524, 'epoch': 3.0})

In [None]:
trainer_three.evaluate(test_dataset)

The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: sentence, __index_level_0__.
***** Running Evaluation *****
  Num examples = 4658
  Batch size = 16


{'epoch': 3.0,
 'eval_accuracy': 0.7887505367110348,
 'eval_f1': 0.7395305948926852,
 'eval_hate_f1': 0.7405614714424009,
 'eval_hate_precision': 0.7129543336439889,
 'eval_hate_recall': 0.770392749244713,
 'eval_loss': 0.5288717746734619,
 'eval_normal_f1': 0.6171560161197467,
 'eval_normal_precision': 0.694300518134715,
 'eval_normal_recall': 0.555440414507772,
 'eval_offensive_f1': 0.8608742971159079,
 'eval_offensive_precision': 0.8435833629576964,
 'eval_offensive_recall': 0.8788888888888889,
 'eval_precision': 0.7502794049121334,
 'eval_recall': 0.7349073508804581,
 'eval_runtime': 3.779,
 'eval_samples_per_second': 1232.608,
 'eval_steps_per_second': 77.27}