In [None]:
!pip install -qq transformers
!pip install -qq optuna
!pip install -qq sentencepiece
!pip install -qq datasets
!pip install -qq stabilizer

In [None]:
import transformers
import datasets
from transformers import AutoTokenizer, AutoModelForSequenceClassification,AdamW, get_linear_schedule_with_warmup,Trainer, TrainingArguments
from transformers.file_utils import is_tf_available, is_torch_available, is_torch_tpu_available
import torch
import numpy as np
import pandas as pd
import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from collections import defaultdict
import random
from textwrap import wrap
from datetime import datetime
from datasets import load_from_disk
from datasets import Dataset
from sklearn.metrics import accuracy_score,classification_report, confusion_matrix
from sklearn.metrics import precision_recall_fscore_support
from stabilizer.reinitialize import reinit_autoencoder_model
from stabilizer.llrd import get_optimizer_parameters_with_llrd

In [None]:
from torch import nn

In [None]:
# the model we gonna train, base uncased BERT
# check text classification models here: https://huggingface.co/models?filter=text-classification
MODEL_NAME = "distilbert-base-uncased"
BATCH_SIZE = 16
EPOCHS = 3
LEARNING_RATE= 6.58e-5
WEIGHT_DECAY = 0.289
WARMUP_STEPS = 464
RANDOM_SEED=22

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

In [None]:
def set_seed(seed):
    """Set all seeds to make results reproducible (deterministic mode).
       When seed is None, disables deterministic mode.
    :param seed: an integer to your choosing
    """
    if seed is not None:
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        np.random.seed(seed)
        random.seed(seed)

def compute_metrics(pred):
  labels = pred.label_ids
  preds = pred.predictions.argmax(-1)
  # calculate accuracy using sklearn's function
  acc = accuracy_score(labels, preds)
  precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='macro')
  acc = accuracy_score(labels, preds)
  confusion_matrix = classification_report(labels, preds, digits=4,output_dict=True)
  return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall,
        'hate_f1': confusion_matrix["0"]["f1-score"],
        'hate_recall': confusion_matrix["0"]["recall"],
        'hate_precision': confusion_matrix["0"]["precision"],
        'offensive_f1': confusion_matrix["1"]["f1-score"],
        'offensive_recall': confusion_matrix["1"]["recall"],
        'offensive_precision': confusion_matrix["1"]["precision"],
        'normal_f1': confusion_matrix["2"]["f1-score"],
        'normal_recall': confusion_matrix["2"]["recall"],
        'normal_precision': confusion_matrix["2"]["precision"],    
  }



def model_init():
  return AutoModelForSequenceClassification.from_pretrained(MODEL_NAME,num_labels=3).to(device)


# Code extracted from DistilBERT implementation
#https://github.com/flowerpot-ai/stabilizer

def reinit_autoencoder_model(model, reinit_num_layers=0):
    """reinitialize autoencoder model layers"""

    if reinit_num_layers:
        for layer in model.distilbert.transformer.layer[-reinit_num_layers:]:
            for module in layer.modules():
                if isinstance(module, nn.Embedding):
                  if module.weight.requires_grad:
                    module.weight.data.normal_(mean=0.0, std=model.config.initializer_range)
                if isinstance(module, nn.Linear):
                  module.weight.data.normal_(mean=0.0, std=model.config.initializer_range)
                elif isinstance(module, nn.LayerNorm):
                  module.bias.data.zero_()
                  module.weight.data.fill_(1.0)
                if isinstance(module, nn.Linear) and module.bias is not None:
                  module.bias.data.zero_()

    return model

def timestamp():
    dateTimeObj = datetime.now()
    timestampStr = dateTimeObj.strftime("%d-%b-%Y (%H:%M:%S.%f)")
    print(timestampStr)

In [None]:
set_seed(RANDOM_SEED)




In [None]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

loading configuration file https://huggingface.co/distilbert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/23454919702d26495337f3da04d1655c7ee010d5ec9d77bdb9e399e00302c0a1.91b885ab15d631bf9cee9dc9d25ece0afd932f2f5130eba28f2055b2220c0333
Model config DistilBertConfig {
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "initializer_range": 0.02,
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "transformers_version": "4.10.0",
  "vocab_size": 30522
}

loading file https://huggingface.co/distilbert-base-uncased/resolve/main/vocab.txt from cache at /root/.cache/huggingface/transformers/0e1bbfda7f63a99bb52e3915dcf10c3c92122b827d92eb2d34ce94ee79ba486c.d789d64ebf

In [None]:
  hatetwit_dataset_dfs = load_from_disk('/content/drive/MyDrive/Dissertation/datasets/hatetwit_'+str(1))
  train_dataset = hatetwit_dataset_dfs ["train"].remove_columns(["input_ids_bert","attention_mask_bert","token_type_ids_bert"])
  eval_dataset = hatetwit_dataset_dfs ["validation"].remove_columns(["input_ids_bert","attention_mask_bert","token_type_ids_bert"])
  test_dataset = hatetwit_dataset_dfs ["test"].remove_columns(["input_ids_bert","attention_mask_bert","token_type_ids_bert"])


In [None]:
training_args = TrainingArguments(
    output_dir='/content/drive/MyDrive/Dissertation/disbert_hate_reinit/results',          # output directory
    num_train_epochs=EPOCHS,              # total number of training epochs
    save_strategy ="epoch" ,
    per_device_train_batch_size=BATCH_SIZE,  # batch size per device during training
    per_device_eval_batch_size=BATCH_SIZE,   # batch size for evaluation
    warmup_steps = WARMUP_STEPS,
    weight_decay= WEIGHT_DECAY,               # strength of weight decay
    learning_rate= LEARNING_RATE, 
    logging_dir='./disbert_hate/logs',     # directory for storing logs
    load_best_model_at_end=True,     # load the best model when finished training (default metric is loss)
    evaluation_strategy="epoch",
    #eval_steps = 500     # evaluate each `logging_steps`
)

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).


In [None]:
model_l0 = model_init()
model_l0 = reinit_autoencoder_model(model_l0,0)
trainer_l0 = Trainer(
    model =model_l0,                         # the instantiated Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=train_dataset,         # training dataset
    eval_dataset=eval_dataset,          # evaluation dataset
    compute_metrics=compute_metrics,     # the callback that computes metrics of interest
)
trainer_l0.train()

loading configuration file https://huggingface.co/distilbert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/23454919702d26495337f3da04d1655c7ee010d5ec9d77bdb9e399e00302c0a1.91b885ab15d631bf9cee9dc9d25ece0afd932f2f5130eba28f2055b2220c0333
Model config DistilBertConfig {
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "transformers_version": "4.10.0",
  "vocab_size": 30522
}

loading weights file https://huggingface.co/distilbert-b

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Hate F1,Hate Recall,Hate Precision,Offensive F1,Offensive Recall,Offensive Precision,Normal F1,Normal Recall,Normal Precision
1,0.5752,0.543973,0.786649,0.722384,0.760456,0.705076,0.720895,0.71328,0.728674,0.866019,0.923732,0.815093,0.580239,0.478216,0.7376
2,0.4382,0.513583,0.806182,0.757252,0.772005,0.745982,0.766133,0.746479,0.78685,0.876923,0.907442,0.84839,0.628699,0.584025,0.680774
3,0.2469,0.598478,0.812191,0.770503,0.769972,0.771184,0.783515,0.793763,0.773529,0.881274,0.880785,0.881764,0.646719,0.639004,0.654623


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: sentence, __index_level_0__.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16
Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_reinit//results/10/checkpoint-2330
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_reinit//results/10/checkpoint-2330/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_reinit//results/10/checkpoint-2330/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: sentence, __index_level_0__.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16
Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_reinit//results/10/checkpoint-4660
Configuration saved in /cont

TrainOutput(global_step=6990, training_loss=0.44640944539562655, metrics={'train_runtime': 310.2819, 'train_samples_per_second': 360.301, 'train_steps_per_second': 22.528, 'total_flos': 1851182116709760.0, 'train_loss': 0.44640944539562655, 'epoch': 3.0})

In [None]:
timestamp()

12-Feb-2022 (20:59:22.739699)


In [None]:
trainer_l0.evaluate(test_dataset)

The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: sentence, __index_level_0__.
***** Running Evaluation *****
  Num examples = 4658
  Batch size = 16


{'epoch': 3.0,
 'eval_accuracy': 0.8065693430656934,
 'eval_f1': 0.7600388624161534,
 'eval_hate_f1': 0.7762416794674859,
 'eval_hate_precision': 0.7895833333333333,
 'eval_hate_recall': 0.7633434038267876,
 'eval_loss': 0.5234386324882507,
 'eval_normal_f1': 0.6303982052720135,
 'eval_normal_precision': 0.687041564792176,
 'eval_normal_recall': 0.5823834196891192,
 'eval_offensive_f1': 0.8734767025089605,
 'eval_offensive_precision': 0.8461805555555556,
 'eval_offensive_recall': 0.9025925925925926,
 'eval_precision': 0.774268484560355,
 'eval_recall': 0.7494398053694997,
 'eval_runtime': 3.6877,
 'eval_samples_per_second': 1263.126,
 'eval_steps_per_second': 79.183}

In [None]:
timestamp()

In [None]:
model_l1 = model_init()
model_l1 = reinit_autoencoder_model(model_l1,1)
trainer_l1 = Trainer(
    model =model_l1,                         # the instantiated Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=train_dataset,         # training dataset
    eval_dataset=eval_dataset,          # evaluation dataset
    compute_metrics=compute_metrics,     # the callback that computes metrics of interest
)
trainer_l1.train()

loading configuration file https://huggingface.co/distilbert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/23454919702d26495337f3da04d1655c7ee010d5ec9d77bdb9e399e00302c0a1.91b885ab15d631bf9cee9dc9d25ece0afd932f2f5130eba28f2055b2220c0333
Model config DistilBertConfig {
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "transformers_version": "4.10.0",
  "vocab_size": 30522
}

loading weights file https://huggingface.co/distilbert-b

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Hate F1,Hate Recall,Hate Precision,Offensive F1,Offensive Recall,Offensive Precision,Normal F1,Normal Recall,Normal Precision
1,0.5676,0.542159,0.778923,0.737252,0.733916,0.746506,0.715388,0.792757,0.651778,0.85254,0.832655,0.873398,0.643828,0.614108,0.676571
2,0.4287,0.505512,0.809401,0.767283,0.768604,0.76952,0.773826,0.820926,0.731839,0.876088,0.875602,0.876575,0.651934,0.612033,0.6974
3,0.2618,0.571578,0.810474,0.768885,0.76824,0.771652,0.785817,0.82495,0.750229,0.876972,0.874861,0.879092,0.643865,0.615145,0.675399


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: sentence, __index_level_0__.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16
Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_reinit/results/checkpoint-2330
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_reinit/results/checkpoint-2330/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_reinit/results/checkpoint-2330/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: sentence, __index_level_0__.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16
Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_reinit/results/checkpoint-4660
Configuration saved in /content/drive/MyDriv

TrainOutput(global_step=6990, training_loss=0.4498143819609766, metrics={'train_runtime': 310.7695, 'train_samples_per_second': 359.736, 'train_steps_per_second': 22.493, 'total_flos': 1851182116709760.0, 'train_loss': 0.4498143819609766, 'epoch': 3.0})

In [None]:
timestamp()


12-Feb-2022 (19:34:30.879907)


In [None]:
trainer_l1.evaluate(test_dataset)

The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: sentence, __index_level_0__.
***** Running Evaluation *****
  Num examples = 4658
  Batch size = 16


{'epoch': 3.0,
 'eval_accuracy': 0.7930442249892657,
 'eval_f1': 0.7492964771761866,
 'eval_hate_f1': 0.7642585551330798,
 'eval_hate_precision': 0.7236723672367237,
 'eval_hate_recall': 0.8096676737160121,
 'eval_loss': 0.5350803732872009,
 'eval_normal_f1': 0.6218302094818082,
 'eval_normal_precision': 0.6643109540636042,
 'eval_normal_recall': 0.5844559585492228,
 'eval_offensive_f1': 0.8618006669136717,
 'eval_offensive_precision': 0.8621200889547813,
 'eval_offensive_recall': 0.8614814814814815,
 'eval_precision': 0.7500344700850364,
 'eval_recall': 0.7518683712489055,
 'eval_runtime': 3.6964,
 'eval_samples_per_second': 1260.129,
 'eval_steps_per_second': 78.995}

In [None]:
timestamp()


12-Feb-2022 (19:34:34.597176)


In [None]:
model_l2 = model_init()
model_l2 = reinit_autoencoder_model(model_l2,2)
trainer_l2 = Trainer(
    model =model_l2,                         # the instantiated Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=train_dataset,         # training dataset
    eval_dataset=eval_dataset,          # evaluation dataset
    compute_metrics=compute_metrics,     # the callback that computes metrics of interest
)
trainer_l2.train()

loading configuration file https://huggingface.co/distilbert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/23454919702d26495337f3da04d1655c7ee010d5ec9d77bdb9e399e00302c0a1.91b885ab15d631bf9cee9dc9d25ece0afd932f2f5130eba28f2055b2220c0333
Model config DistilBertConfig {
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "transformers_version": "4.10.0",
  "vocab_size": 30522
}

loading weights file https://huggingface.co/distilbert-b

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Hate F1,Hate Recall,Hate Precision,Offensive F1,Offensive Recall,Offensive Precision,Normal F1,Normal Recall,Normal Precision
1,0.569,0.547982,0.775918,0.731,0.736677,0.741905,0.713732,0.834004,0.623777,0.851725,0.831544,0.872911,0.627542,0.560166,0.713342
2,0.4306,0.498001,0.802533,0.755148,0.764668,0.748887,0.769766,0.778672,0.761062,0.872083,0.892262,0.852795,0.623596,0.575726,0.680147
3,0.2657,0.597122,0.804035,0.761795,0.760826,0.764666,0.782525,0.81992,0.748393,0.871566,0.869308,0.873837,0.631294,0.604772,0.660249


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: sentence, __index_level_0__.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16
Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_reinit/results/checkpoint-2330
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_reinit/results/checkpoint-2330/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_reinit/results/checkpoint-2330/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: sentence, __index_level_0__.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16
Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_reinit/results/checkpoint-4660
Configuration saved in /content/drive/MyDriv

TrainOutput(global_step=6990, training_loss=0.45234484365569677, metrics={'train_runtime': 310.613, 'train_samples_per_second': 359.917, 'train_steps_per_second': 22.504, 'total_flos': 1851182116709760.0, 'train_loss': 0.45234484365569677, 'epoch': 3.0})

In [None]:
timestamp()

12-Feb-2022 (19:39:46.675339)


In [None]:
trainer_l2.evaluate(test_dataset)

The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: sentence, __index_level_0__.
***** Running Evaluation *****
  Num examples = 4658
  Batch size = 16


{'epoch': 3.0,
 'eval_accuracy': 0.798196650923143,
 'eval_f1': 0.7530348406370205,
 'eval_hate_f1': 0.7754299754299755,
 'eval_hate_precision': 0.7571976967370442,
 'eval_hate_recall': 0.7945619335347432,
 'eval_loss': 0.5186144113540649,
 'eval_normal_f1': 0.6193693693693695,
 'eval_normal_precision': 0.6781750924784217,
 'eval_normal_recall': 0.5699481865284974,
 'eval_offensive_f1': 0.8643051771117165,
 'eval_offensive_precision': 0.8481283422459893,
 'eval_offensive_recall': 0.8811111111111111,
 'eval_precision': 0.7611670438204851,
 'eval_recall': 0.7485404103914505,
 'eval_runtime': 3.6887,
 'eval_samples_per_second': 1262.759,
 'eval_steps_per_second': 79.16}

In [None]:
timestamp()

12-Feb-2022 (19:39:50.386979)


In [None]:
model_l3 = model_init()
model_l3 = reinit_autoencoder_model(model_l3,3)
trainer_l3 = Trainer(
    model =model_l3,                         # the instantiated Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=train_dataset,         # training dataset
    eval_dataset=eval_dataset,          # evaluation dataset
    compute_metrics=compute_metrics,     # the callback that computes metrics of interest
)
trainer_l3.train()

loading configuration file https://huggingface.co/distilbert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/23454919702d26495337f3da04d1655c7ee010d5ec9d77bdb9e399e00302c0a1.91b885ab15d631bf9cee9dc9d25ece0afd932f2f5130eba28f2055b2220c0333
Model config DistilBertConfig {
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "transformers_version": "4.10.0",
  "vocab_size": 30522
}

loading weights file https://huggingface.co/distilbert-b

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Hate F1,Hate Recall,Hate Precision,Offensive F1,Offensive Recall,Offensive Precision,Normal F1,Normal Recall,Normal Precision
1,0.5728,0.539937,0.781713,0.734538,0.738398,0.739737,0.728098,0.806841,0.663358,0.856877,0.851166,0.862664,0.618639,0.561203,0.689172
2,0.4399,0.501855,0.802962,0.758845,0.763159,0.758223,0.765468,0.802817,0.731439,0.870843,0.877453,0.864333,0.640223,0.594398,0.693705
3,0.2846,0.584767,0.802747,0.762338,0.760017,0.765778,0.781054,0.812877,0.751628,0.869112,0.864124,0.874157,0.636848,0.620332,0.654267


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: sentence, __index_level_0__.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16
Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_reinit/results/checkpoint-2330
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_reinit/results/checkpoint-2330/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_reinit/results/checkpoint-2330/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: sentence, __index_level_0__.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16
Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_reinit/results/checkpoint-4660
Configuration saved in /content/drive/MyDriv

TrainOutput(global_step=6990, training_loss=0.4660367013388267, metrics={'train_runtime': 312.5988, 'train_samples_per_second': 357.631, 'train_steps_per_second': 22.361, 'total_flos': 1851182116709760.0, 'train_loss': 0.4660367013388267, 'epoch': 3.0})

In [None]:
timestamp()

12-Feb-2022 (19:45:04.425282)


In [None]:
trainer_l3.evaluate(test_dataset)

The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: sentence, __index_level_0__.
***** Running Evaluation *****
  Num examples = 4658
  Batch size = 16


{'epoch': 3.0,
 'eval_accuracy': 0.798626019750966,
 'eval_f1': 0.7525014322460969,
 'eval_hate_f1': 0.7672289156626506,
 'eval_hate_precision': 0.7356746765249538,
 'eval_hate_recall': 0.8016112789526687,
 'eval_loss': 0.5245223045349121,
 'eval_normal_f1': 0.6229508196721312,
 'eval_normal_precision': 0.6853233830845771,
 'eval_normal_recall': 0.5709844559585492,
 'eval_offensive_f1': 0.8673245614035089,
 'eval_offensive_precision': 0.8560606060606061,
 'eval_offensive_recall': 0.8788888888888889,
 'eval_precision': 0.759019555223379,
 'eval_recall': 0.7504948746000356,
 'eval_runtime': 3.6872,
 'eval_samples_per_second': 1263.303,
 'eval_steps_per_second': 79.194}

In [None]:
timestamp()

12-Feb-2022 (19:45:08.133371)


In [None]:
model_l4 = model_init()
model_l4 = reinit_autoencoder_model(model_l4,4)
trainer_l4 = Trainer(
    model =model_l4,                         # the instantiated Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=train_dataset,         # training dataset
    eval_dataset=eval_dataset,          # evaluation dataset
    compute_metrics=compute_metrics,     # the callback that computes metrics of interest
)
trainer_l4.train()

loading configuration file https://huggingface.co/distilbert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/23454919702d26495337f3da04d1655c7ee010d5ec9d77bdb9e399e00302c0a1.91b885ab15d631bf9cee9dc9d25ece0afd932f2f5130eba28f2055b2220c0333
Model config DistilBertConfig {
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "transformers_version": "4.10.0",
  "vocab_size": 30522
}

loading weights file https://huggingface.co/distilbert-b

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Hate F1,Hate Recall,Hate Precision,Offensive F1,Offensive Recall,Offensive Precision,Normal F1,Normal Recall,Normal Precision
1,0.5762,0.551037,0.771839,0.72115,0.727027,0.725332,0.713572,0.790744,0.650124,0.851151,0.848945,0.853368,0.598726,0.536307,0.677588
2,0.4471,0.525794,0.797811,0.749874,0.755132,0.751599,0.753731,0.812877,0.702609,0.871105,0.874491,0.867744,0.624786,0.567427,0.695044
3,0.3054,0.575829,0.802533,0.757886,0.759921,0.757667,0.762506,0.789738,0.737089,0.87325,0.877453,0.869087,0.637903,0.605809,0.673587


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: sentence, __index_level_0__.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16
Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_reinit/results/checkpoint-2330
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_reinit/results/checkpoint-2330/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_reinit/results/checkpoint-2330/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: sentence, __index_level_0__.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16
Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_reinit/results/checkpoint-4660
Configuration saved in /content/drive/MyDriv

TrainOutput(global_step=6990, training_loss=0.47607708714039026, metrics={'train_runtime': 314.3891, 'train_samples_per_second': 355.594, 'train_steps_per_second': 22.234, 'total_flos': 1851182116709760.0, 'train_loss': 0.47607708714039026, 'epoch': 3.0})

In [None]:
trainer_l4.evaluate(test_dataset)

The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: sentence, __index_level_0__.
***** Running Evaluation *****
  Num examples = 4658
  Batch size = 16


{'epoch': 3.0,
 'eval_accuracy': 0.7958351223701159,
 'eval_f1': 0.7499905905706017,
 'eval_hate_f1': 0.75426944971537,
 'eval_hate_precision': 0.7130044843049327,
 'eval_hate_recall': 0.8006042296072508,
 'eval_loss': 0.5490932464599609,
 'eval_normal_f1': 0.6297343131712833,
 'eval_normal_precision': 0.6927860696517413,
 'eval_normal_recall': 0.5772020725388601,
 'eval_offensive_f1': 0.8659680088251517,
 'eval_offensive_precision': 0.859802847754655,
 'eval_offensive_recall': 0.8722222222222222,
 'eval_precision': 0.7551978005704431,
 'eval_recall': 0.7500095081227777,
 'eval_runtime': 3.6786,
 'eval_samples_per_second': 1266.252,
 'eval_steps_per_second': 79.379}

In [None]:
model_l5 = model_init()
model_l5 = reinit_autoencoder_model(model_l5,5)
trainer_l5 = Trainer(
    model =model_l5,                         # the instantiated Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=train_dataset,         # training dataset
    eval_dataset=eval_dataset,          # evaluation dataset
    compute_metrics=compute_metrics,     # the callback that computes metrics of interest
)
trainer_l5.train()

loading configuration file https://huggingface.co/distilbert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/23454919702d26495337f3da04d1655c7ee010d5ec9d77bdb9e399e00302c0a1.91b885ab15d631bf9cee9dc9d25ece0afd932f2f5130eba28f2055b2220c0333
Model config DistilBertConfig {
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "transformers_version": "4.10.0",
  "vocab_size": 30522
}

loading weights file https://huggingface.co/distilbert-b

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Hate F1,Hate Recall,Hate Precision,Offensive F1,Offensive Recall,Offensive Precision,Normal F1,Normal Recall,Normal Precision
1,0.5859,0.562249,0.773986,0.72312,0.72979,0.723905,0.711691,0.768612,0.662619,0.852725,0.85746,0.848041,0.604945,0.545643,0.67871
2,0.4604,0.53587,0.793518,0.747971,0.751059,0.748672,0.750476,0.792757,0.712477,0.864676,0.868197,0.861183,0.628763,0.585062,0.679518
3,0.3373,0.570995,0.794162,0.750821,0.751278,0.751996,0.756286,0.78672,0.728119,0.863855,0.864495,0.863216,0.632321,0.604772,0.6625


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: sentence, __index_level_0__.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16
Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_reinit/results/checkpoint-2330
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_reinit/results/checkpoint-2330/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_reinit/results/checkpoint-2330/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: sentence, __index_level_0__.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16
Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_reinit/results/checkpoint-4660
Configuration saved in /content/drive/MyDriv

TrainOutput(global_step=6990, training_loss=0.4937893388609006, metrics={'train_runtime': 314.8008, 'train_samples_per_second': 355.129, 'train_steps_per_second': 22.205, 'total_flos': 1851182116709760.0, 'train_loss': 0.4937893388609006, 'epoch': 3.0})

In [None]:
trainer_l5.evaluate(test_dataset)

The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: sentence, __index_level_0__.
***** Running Evaluation *****
  Num examples = 4658
  Batch size = 16


{'epoch': 3.0,
 'eval_accuracy': 0.787247745813654,
 'eval_f1': 0.7412526746022313,
 'eval_hate_f1': 0.7464114832535885,
 'eval_hate_precision': 0.7110300820419325,
 'eval_hate_recall': 0.7854984894259819,
 'eval_loss': 0.5579251050949097,
 'eval_normal_f1': 0.6182019977802442,
 'eval_normal_precision': 0.6654719235364397,
 'eval_normal_recall': 0.5772020725388601,
 'eval_offensive_f1': 0.8591445427728615,
 'eval_offensive_precision': 0.855359765051395,
 'eval_offensive_recall': 0.8629629629629629,
 'eval_precision': 0.7439539235432558,
 'eval_recall': 0.7418878416426016,
 'eval_runtime': 3.7238,
 'eval_samples_per_second': 1250.858,
 'eval_steps_per_second': 78.414}

In [None]:
model_l6 = model_init()
model_l6 = reinit_autoencoder_model(model_l6,6)
trainer_l6 = Trainer(
    model =model_l6,                         # the instantiated Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=train_dataset,         # training dataset
    eval_dataset=eval_dataset,          # evaluation dataset
    compute_metrics=compute_metrics,     # the callback that computes metrics of interest
)
trainer_l6.train()

loading configuration file https://huggingface.co/distilbert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/23454919702d26495337f3da04d1655c7ee010d5ec9d77bdb9e399e00302c0a1.91b885ab15d631bf9cee9dc9d25ece0afd932f2f5130eba28f2055b2220c0333
Model config DistilBertConfig {
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "transformers_version": "4.10.0",
  "vocab_size": 30522
}

loading weights file https://huggingface.co/distilbert-b

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Hate F1,Hate Recall,Hate Precision,Offensive F1,Offensive Recall,Offensive Precision,Normal F1,Normal Recall,Normal Precision
1,0.6059,0.571068,0.764756,0.714698,0.7197,0.718557,0.699772,0.772636,0.639467,0.844667,0.84154,0.847818,0.599655,0.541494,0.671815
2,0.4939,0.557007,0.777849,0.719782,0.739334,0.71275,0.717653,0.750503,0.687558,0.857654,0.886709,0.830444,0.584039,0.501037,0.7
3,0.3748,0.578897,0.78622,0.740468,0.740428,0.741804,0.743938,0.77163,0.718165,0.860422,0.860422,0.860422,0.617044,0.593361,0.642697


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: sentence, __index_level_0__.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16
Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_reinit/results/checkpoint-2330
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_reinit/results/checkpoint-2330/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_reinit/results/checkpoint-2330/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: sentence, __index_level_0__.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16
Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_reinit/results/checkpoint-4660
Configuration saved in /content/drive/MyDriv

TrainOutput(global_step=6990, training_loss=0.5250469879019414, metrics={'train_runtime': 310.0341, 'train_samples_per_second': 360.589, 'train_steps_per_second': 22.546, 'total_flos': 1851182116709760.0, 'train_loss': 0.5250469879019414, 'epoch': 3.0})

In [None]:
trainer_l6.evaluate(test_dataset)

The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: sentence, __index_level_0__.
***** Running Evaluation *****
  Num examples = 4658
  Batch size = 16


{'epoch': 3.0,
 'eval_accuracy': 0.7760841562902533,
 'eval_f1': 0.7213222095438064,
 'eval_hate_f1': 0.7204249154997586,
 'eval_hate_precision': 0.6920222634508348,
 'eval_hate_recall': 0.7512588116817724,
 'eval_loss': 0.5702793598175049,
 'eval_normal_f1': 0.5922798552472859,
 'eval_normal_precision': 0.7085137085137085,
 'eval_normal_recall': 0.5088082901554404,
 'eval_offensive_f1': 0.8512618578843745,
 'eval_offensive_precision': 0.8236924142708694,
 'eval_offensive_recall': 0.8807407407407407,
 'eval_precision': 0.7414094620784709,
 'eval_recall': 0.7136026141926511,
 'eval_runtime': 3.6227,
 'eval_samples_per_second': 1285.792,
 'eval_steps_per_second': 80.604}