In [None]:
!pip install -qq transformers
!pip install -qq optuna
!pip install -qq sentencepiece
!pip install -qq datasets

[K     |████████████████████████████████| 3.5 MB 14.4 MB/s 
[K     |████████████████████████████████| 895 kB 53.3 MB/s 
[K     |████████████████████████████████| 596 kB 57.9 MB/s 
[K     |████████████████████████████████| 67 kB 6.1 MB/s 
[K     |████████████████████████████████| 6.8 MB 75.5 MB/s 
[K     |████████████████████████████████| 308 kB 13.2 MB/s 
[K     |████████████████████████████████| 210 kB 69.4 MB/s 
[K     |████████████████████████████████| 80 kB 10.8 MB/s 
[K     |████████████████████████████████| 75 kB 5.0 MB/s 
[K     |████████████████████████████████| 149 kB 87.0 MB/s 
[K     |████████████████████████████████| 49 kB 7.8 MB/s 
[K     |████████████████████████████████| 113 kB 83.6 MB/s 
[?25h  Building wheel for pyperclip (setup.py) ... [?25l[?25hdone
[K     |████████████████████████████████| 1.2 MB 14.4 MB/s 
[K     |████████████████████████████████| 311 kB 14.1 MB/s 
[K     |████████████████████████████████| 243 kB 71.4 MB/s 
[K     |██████████████

In [None]:
import transformers
import datasets
from transformers import AutoTokenizer, AutoModelForSequenceClassification,AdamW, get_linear_schedule_with_warmup,Trainer, TrainingArguments
from transformers.file_utils import is_tf_available, is_torch_available, is_torch_tpu_available
import torch
import numpy as np
import pandas as pd
import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from collections import defaultdict
import random
from textwrap import wrap
from datetime import datetime
from datasets import load_from_disk
from datasets import Dataset
from sklearn.metrics import accuracy_score,classification_report, confusion_matrix
from sklearn.metrics import precision_recall_fscore_support

In [None]:
# the model we gonna train, base uncased BERT
# check text classification models here: https://huggingface.co/models?filter=text-classification
MODEL_NAME = "distilbert-base-uncased"
# max sequence length for each document/sentence sample
MAX_LENGTH = 64
BATCH_SIZE = 16
EPOCHS = 5
LEARNING_RATE= 1e-5
WEIGHT_DECAY = 1e-16
RANDOM_SEED=5

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

In [None]:
def set_seed(seed):
    """Set all seeds to make results reproducible (deterministic mode).
       When seed is None, disables deterministic mode.
    :param seed: an integer to your choosing
    """
    if seed is not None:
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        np.random.seed(seed)
        random.seed(seed)

def compute_metrics(pred):
  labels = pred.label_ids
  preds = pred.predictions.argmax(-1)
  # calculate accuracy using sklearn's function
  acc = accuracy_score(labels, preds)
  precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='macro')
  acc = accuracy_score(labels, preds)
  confusion_matrix = classification_report(labels, preds, digits=4,output_dict=True)
  return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall,
        'hate_f1': confusion_matrix["0"]["f1-score"],
        'hate_recall': confusion_matrix["0"]["recall"],
        'hate_precision': confusion_matrix["0"]["precision"],
        'offensive_f1': confusion_matrix["1"]["f1-score"],
        'offensive_recall': confusion_matrix["1"]["recall"],
        'offensive_precision': confusion_matrix["1"]["precision"],
        'normal_f1': confusion_matrix["2"]["f1-score"],
        'normal_recall': confusion_matrix["2"]["recall"],
        'normal_precision': confusion_matrix["2"]["precision"],    
  }

def model_init():
  temp_model =  AutoModelForSequenceClassification.from_pretrained(MODEL_NAME,num_labels=3).to(device)
  return temp_model

def timestamp():
    dateTimeObj = datetime.now()
    timestampStr = dateTimeObj.strftime("%d-%b-%Y (%H:%M:%S.%f)")
    print(timestampStr)

In [None]:
set_seed(RANDOM_SEED)




In [None]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/483 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/455k [00:00<?, ?B/s]

In [None]:
dataset_dfs = load_from_disk('/content/drive/MyDrive/Dissertation/datasets/hatetwit_'+str(1))


In [None]:
training_args = TrainingArguments(
    output_dir='/content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results',          # output directory
    num_train_epochs=EPOCHS,              # total number of training epochs
    save_strategy ="epoch" ,
    per_device_train_batch_size=BATCH_SIZE,  # batch size per device during training
    per_device_eval_batch_size=BATCH_SIZE,   # batch size for evaluation
    weight_decay= WEIGHT_DECAY,               # strength of weight decay
    learning_rate= LEARNING_RATE, 
    logging_dir='./disbert_hate/hyper/logs',     # directory for storing logs
    load_best_model_at_end=True,     # load the best model when finished training (default metric is loss)
    evaluation_strategy="epoch",
    #eval_steps = 500     # evaluate each `logging_steps`
)

Each argument is explained in the code comments, I've specified 16 as training batch size, that's because it's the maximum I can get to fit in a Google Colab environment's memory.

You can also tweak other parameters, such as increasing the number of epochs for better training.

***Note that load best model at end set to False as issue with saving steps value

In [None]:
hyper_trainer = Trainer(
    model_init=model_init,                         # the instantiated Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=dataset_dfs['train'],         # training dataset
    eval_dataset=dataset_dfs['validation'],          # evaluation dataset
    compute_metrics=compute_metrics,     # the callback that computes metrics of interest
)

loading configuration file https://huggingface.co/distilbert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/23454919702d26495337f3da04d1655c7ee010d5ec9d77bdb9e399e00302c0a1.91b885ab15d631bf9cee9dc9d25ece0afd932f2f5130eba28f2055b2220c0333
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "transformers_version": "4.16.2",
  "vocab_size": 30522
}

https://hu

Downloading:   0%|          | 0.00/256M [00:00<?, ?B/s]

storing https://huggingface.co/distilbert-base-uncased/resolve/main/pytorch_model.bin in cache at /root/.cache/huggingface/transformers/9c169103d7e5a73936dd2b627e42851bec0831212b677c637033ee4bce9ab5ee.126183e36667471617ae2f0835fab707baa54b731f991507ebbb55ea85adb12a
creating metadata file for /root/.cache/huggingface/transformers/9c169103d7e5a73936dd2b627e42851bec0831212b677c637033ee4bce9ab5ee.126183e36667471617ae2f0835fab707baa54b731f991507ebbb55ea85adb12a
loading weights file https://huggingface.co/distilbert-base-uncased/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/9c169103d7e5a73936dd2b627e42851bec0831212b677c637033ee4bce9ab5ee.126183e36667471617ae2f0835fab707baa54b731f991507ebbb55ea85adb12a
Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_projector.weight', 'vocab_transform.weight', 'vocab_layer_norm.weight', 'vocab_transform.bias', 'vocab_layer_norm.bia

In [None]:
def hp_space_optuna(trial) :
    return {
        "learning_rate": trial.suggest_float("learning_rate", 1e-6, 1e-4, log=True),
        "num_train_epochs": trial.suggest_int("num_train_epochs", 2, 5),
        "seed": trial.suggest_int("seed", 1, 40),
        "warmup_steps": trial.suggest_int("warmup_steps", 0, 500),
        "weight_decay": trial.suggest_float("weight_decay", 0, 0.3),
        "per_device_train_batch_size": trial.suggest_categorical("per_device_train_batch_size", [ 8, 16, 32, 64]),
    }

In [None]:
best_trial = hyper_trainer.hyperparameter_search(n_trials=40, direction="maximize", backend="optuna", hp_space=hp_space_optuna)

[32m[I 2022-02-12 13:32:41,386][0m A new study created in memory with name: no-name-60724ab9-d73e-4387-8c41-db9ac9239eee[0m
Trial:
loading configuration file https://huggingface.co/distilbert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/23454919702d26495337f3da04d1655c7ee010d5ec9d77bdb9e399e00302c0a1.91b885ab15d631bf9cee9dc9d25ece0afd932f2f5130eba28f2055b2220c0333
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout":

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.655,0.595284,0.761322,0.75703,0.756396,0.761322
2,0.5749,0.561695,0.772269,0.770109,0.770437,0.772269
3,0.5446,0.5529,0.775274,0.771375,0.770792,0.775274


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.6783    0.7254    0.7010       994
           1     0.8235    0.8601    0.8414      2701
           2     0.6490    0.5218    0.5785       964

    accuracy                         0.7613      4659
   macro avg     0.7169    0.7024    0.7070      4659
weighted avg     0.7564    0.7613    0.7570      4659

0.7570295544860335


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-0/checkpoint-1165
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-0/checkpoint-1165/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-0/checkpoint-1165/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.6781    0.7565    0.7152       994
           1     0.8478    0.8560    0.8519      2701
           2     0.6488    0.5539    0.5976       964

    accuracy                         0.7723      4659
   macro avg     0.7249    0.7222    0.7216      4659
weighted avg     0.7704    0.7723    0.7701      4659

0.7701089650230784


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-0/checkpoint-2330
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-0/checkpoint-2330/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-0/checkpoint-2330/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.6907    0.7435    0.7161       994
           1     0.8402    0.8719    0.8557      2701
           2     0.6590    0.5373    0.5920       964

    accuracy                         0.7753      4659
   macro avg     0.7300    0.7176    0.7213      4659
weighted avg     0.7708    0.7753    0.7714      4659

0.771374534118344


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-0/checkpoint-3495
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-0/checkpoint-3495/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-0/checkpoint-3495/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-0/checkpoint-3495 (score: 0.5529000163078308).
[32m[I 2022-02-12 13:37:00,939][0m Trial 0 finished with value: 3.0927140278755667 and parameters: {'learning_rate': 5.163945255711368e-06, 'num_train_epochs': 3, 'seed': 33, 'warmup_steps': 172, 'weight_decay': 0.1163929664963968, 'per_device_train_batch_size': 32}. Best is trial 0 with value: 3.0927140278755667.[0m
Trial:
loading configuration file https://huggingface.co/distilbert-base-unc

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.5929,0.558939,0.770337,0.772311,0.777818,0.770337
2,0.4787,0.553991,0.796737,0.794592,0.793188,0.796737
3,0.3589,0.634564,0.802533,0.802054,0.801775,0.802533


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.6457    0.7757    0.7048       994
           1     0.8732    0.8238    0.8478      2701
           2     0.6467    0.6151    0.6305       964

    accuracy                         0.7703      4659
   macro avg     0.7219    0.7382    0.7277      4659
weighted avg     0.7778    0.7703    0.7723      4659

0.7723110129090739


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-1/checkpoint-4659
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-1/checkpoint-4659/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-1/checkpoint-4659/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7487    0.7404    0.7446       994
           1     0.8570    0.8852    0.8709      2701
           2     0.6603    0.6068    0.6324       964

    accuracy                         0.7967      4659
   macro avg     0.7553    0.7442    0.7493      4659
weighted avg     0.7932    0.7967    0.7946      4659

0.7945923621587604


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-1/checkpoint-9318
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-1/checkpoint-9318/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-1/checkpoint-9318/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7565    0.7847    0.7704       994
           1     0.8729    0.8723    0.8726      2701
           2     0.6491    0.6255    0.6371       964

    accuracy                         0.8025      4659
   macro avg     0.7595    0.7608    0.7600      4659
weighted avg     0.8018    0.8025    0.8021      4659

0.8020540267503339


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-1/checkpoint-13977
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-1/checkpoint-13977/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-1/checkpoint-13977/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-1/checkpoint-9318 (score: 0.5539913773536682).
[32m[I 2022-02-12 13:45:19,021][0m Trial 1 finished with value: 3.208894886927413 and parameters: {'learning_rate': 7.113930006942247e-05, 'num_train_epochs': 3, 'seed': 21, 'warmup_steps': 499, 'weight_decay': 0.005885721438242597, 'per_device_train_batch_size': 8}. Best is trial 1 with value: 3.208894886927413.[0m
Trial:
loading configuration file https://huggingface.co/distilbert-base-u

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.7624,0.669116,0.71818,0.706584,0.704843,0.71818
2,0.6636,0.617361,0.746941,0.742998,0.741183,0.746941
3,0.6215,0.601205,0.753381,0.748042,0.747657,0.753381
4,0.5906,0.595284,0.7566,0.751303,0.750373,0.7566


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.6432    0.6258    0.6344       994
           1     0.7733    0.8663    0.8172      2701
           2     0.5766    0.3983    0.4712       964

    accuracy                         0.7182      4659
   macro avg     0.6644    0.6301    0.6409      4659
weighted avg     0.7048    0.7182    0.7066      4659

0.7065841685395443


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-2/checkpoint-1165
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-2/checkpoint-1165/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-2/checkpoint-1165/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.6657    0.6871    0.6762       994
           1     0.8176    0.8530    0.8349      2701
           2     0.6049    0.5114    0.5542       964

    accuracy                         0.7469      4659
   macro avg     0.6961    0.6839    0.6885      4659
weighted avg     0.7412    0.7469    0.7430      4659

0.7429980128234656


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-2/checkpoint-2330
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-2/checkpoint-2330/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-2/checkpoint-2330/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.6648    0.7203    0.6915       994
           1     0.8171    0.8586    0.8373      2701
           2     0.6384    0.4927    0.5562       964

    accuracy                         0.7534      4659
   macro avg     0.7068    0.6905    0.6950      4659
weighted avg     0.7477    0.7534    0.7480      4659

0.7480424744087703


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-2/checkpoint-3495
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-2/checkpoint-3495/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-2/checkpoint-3495/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.6739    0.7193    0.6959       994
           1     0.8202    0.8630    0.8411      2701
           2     0.6336    0.4969    0.5570       964

    accuracy                         0.7566      4659
   macro avg     0.7092    0.6931    0.6980      4659
weighted avg     0.7504    0.7566    0.7513      4659

0.7513027172473458


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-2/checkpoint-4660
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-2/checkpoint-4660/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-2/checkpoint-4660/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-2/checkpoint-4660 (score: 0.5952842235565186).
[32m[I 2022-02-12 13:51:05,263][0m Trial 2 finished with value: 3.0148763973087584 and parameters: {'learning_rate': 1.9885527540532752e-06, 'num_train_epochs': 4, 'seed': 7, 'warmup_steps': 398, 'weight_decay': 0.2021313515600552, 'per_device_train_batch_size': 32}. Best is trial 1 with value: 3.208894886927413.[0m
Trial:
loading configuration file https://huggingface.co/distilbert-base-unca

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.7942,0.593869,0.761966,0.755312,0.757017,0.761966
2,0.5698,0.544258,0.778064,0.776214,0.775259,0.778064
3,0.5258,0.535419,0.782357,0.77821,0.77807,0.782357


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.6646    0.7555    0.7072       994
           1     0.8250    0.8675    0.8457      2701
           2     0.6618    0.4730    0.5517       964

    accuracy                         0.7620      4659
   macro avg     0.7171    0.6987    0.7015      4659
weighted avg     0.7570    0.7620    0.7553      4659

0.7553122101801825


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-3/checkpoint-583
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-3/checkpoint-583/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-3/checkpoint-583/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7064    0.7384    0.7221       994
           1     0.8484    0.8638    0.8560      2701
           2     0.6414    0.5788    0.6085       964

    accuracy                         0.7781      4659
   macro avg     0.7321    0.7270    0.7289      4659
weighted avg     0.7753    0.7781    0.7762      4659

0.7762140175722042


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-3/checkpoint-1166
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-3/checkpoint-1166/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-3/checkpoint-1166/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7035    0.7686    0.7346       994
           1     0.8433    0.8745    0.8586      2701
           2     0.6723    0.5384    0.5979       964

    accuracy                         0.7824      4659
   macro avg     0.7397    0.7272    0.7304      4659
weighted avg     0.7781    0.7824    0.7782      4659

0.7782096523445967


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-3/checkpoint-1749
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-3/checkpoint-1749/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-3/checkpoint-1749/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-3/checkpoint-1749 (score: 0.5354192852973938).
[32m[I 2022-02-12 13:54:55,504][0m Trial 3 finished with value: 3.1209932494383255 and parameters: {'learning_rate': 1.112960505325453e-05, 'num_train_epochs': 3, 'seed': 29, 'warmup_steps': 356, 'weight_decay': 0.28109091759167143, 'per_device_train_batch_size': 64}. Best is trial 1 with value: 3.208894886927413.[0m
Trial:
loading configuration file https://huggingface.co/distilbert-base-unc

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.5591,0.531514,0.781284,0.780291,0.779593,0.781284
2,0.432,0.540284,0.804679,0.80299,0.802461,0.804679


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7286    0.6942    0.7110       994
           1     0.8503    0.8682    0.8591      2701
           2     0.6342    0.6276    0.6309       964

    accuracy                         0.7813      4659
   macro avg     0.7377    0.7300    0.7337      4659
weighted avg     0.7796    0.7813    0.7803      4659

0.7802912548292993


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-4/checkpoint-4659
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-4/checkpoint-4659/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-4/checkpoint-4659/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7443    0.7938    0.7683       994
           1     0.8672    0.8778    0.8725      2701
           2     0.6809    0.6110    0.6441       964

    accuracy                         0.8047      4659
   macro avg     0.7642    0.7609    0.7616      4659
weighted avg     0.8025    0.8047    0.8030      4659

0.802990499424248


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-4/checkpoint-9318
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-4/checkpoint-9318/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-4/checkpoint-9318/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-4/checkpoint-4659 (score: 0.5315142869949341).
[32m[I 2022-02-12 14:00:30,973][0m Trial 4 finished with value: 3.2148099108683876 and parameters: {'learning_rate': 4.202148090930707e-05, 'num_train_epochs': 2, 'seed': 38, 'warmup_steps': 152, 'weight_decay': 0.14589392232755702, 'per_device_train_batch_size': 8}. Best is trial 4 with value: 3.2148099108683876.[0m
Trial:
loading configuration file https://huggingface.co/distilbert-base-unc

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.5535,0.529251,0.776991,0.780073,0.785671,0.776991
2,0.4414,0.495635,0.805538,0.801923,0.800715,0.805538
3,0.298,0.539576,0.80146,0.800795,0.801043,0.80146


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.6771    0.7636    0.7177       994
           1     0.8833    0.8238    0.8525      2701
           2     0.6241    0.6598    0.6415       964

    accuracy                         0.7770      4659
   macro avg     0.7282    0.7490    0.7372      4659
weighted avg     0.7857    0.7770    0.7801      4659

0.7800731492470574


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-5/checkpoint-2330
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-5/checkpoint-2330/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-5/checkpoint-2330/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7581    0.7726    0.7653       994
           1     0.8555    0.8963    0.8754      2701
           2     0.6912    0.5851    0.6337       964

    accuracy                         0.8055      4659
   macro avg     0.7683    0.7513    0.7582      4659
weighted avg     0.8007    0.8055    0.8019      4659

0.8019228660208497


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-5/checkpoint-4660
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-5/checkpoint-4660/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-5/checkpoint-4660/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7384    0.8038    0.7697       994
           1     0.8771    0.8689    0.8730      2701
           2     0.6526    0.6100    0.6306       964

    accuracy                         0.8015      4659
   macro avg     0.7560    0.7609    0.7578      4659
weighted avg     0.8010    0.8015    0.8008      4659

0.8007954441915375


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-5/checkpoint-6990
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-5/checkpoint-6990/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-5/checkpoint-6990/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-5/checkpoint-4660 (score: 0.49563461542129517).
[32m[I 2022-02-12 14:05:58,861][0m Trial 5 finished with value: 3.204757096036416 and parameters: {'learning_rate': 3.099273387921209e-05, 'num_train_epochs': 3, 'seed': 21, 'warmup_steps': 277, 'weight_decay': 0.2566556426664683, 'per_device_train_batch_size': 16}. Best is trial 4 with value: 3.2148099108683876.[0m
Trial:
loading configuration file https://huggingface.co/distilbert-base-unc

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.729,0.590566,0.76261,0.754862,0.755621,0.76261


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.6813    0.7183    0.6993       994
           1     0.8133    0.8808    0.8457      2701
           2     0.6706    0.4772    0.5576       964

    accuracy                         0.7626      4659
   macro avg     0.7217    0.6921    0.7009      4659
weighted avg     0.7556    0.7626    0.7549      4659

0.7548618403201093


[32m[I 2022-02-12 14:07:14,190][0m Trial 6 pruned. [0m
Trial:
loading configuration file https://huggingface.co/distilbert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/23454919702d26495337f3da04d1655c7ee010d5ec9d77bdb9e399e00302c0a1.91b885ab15d631bf9cee9dc9d25ece0afd932f2f5130eba28f2055b2220c0333
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "tra

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6009,0.558845,0.779566,0.774559,0.773395,0.779566
2,0.5124,0.533036,0.784503,0.778503,0.78124,0.784503


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7316    0.6911    0.7108       994
           1     0.8234    0.8893    0.8551      2701
           2     0.6762    0.5633    0.6146       964

    accuracy                         0.7796      4659
   macro avg     0.7438    0.7146    0.7268      4659
weighted avg     0.7734    0.7796    0.7746      4659

0.7745591309632635


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-7/checkpoint-2330
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-7/checkpoint-2330/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-7/checkpoint-2330/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.6877    0.7797    0.7308       994
           1     0.8409    0.8845    0.8621      2701
           2     0.7106    0.5093    0.5934       964

    accuracy                         0.7845      4659
   macro avg     0.7464    0.7245    0.7288      4659
weighted avg     0.7812    0.7845    0.7785      4659

0.7785029696022018


[32m[I 2022-02-12 14:10:52,499][0m Trial 7 pruned. [0m
Trial:
loading configuration file https://huggingface.co/distilbert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/23454919702d26495337f3da04d1655c7ee010d5ec9d77bdb9e399e00302c0a1.91b885ab15d631bf9cee9dc9d25ece0afd932f2f5130eba28f2055b2220c0333
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "tra

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6981,0.544126,0.778279,0.771479,0.771929,0.778279
2,0.4993,0.496555,0.797167,0.798736,0.800924,0.797167
3,0.3984,0.510567,0.80425,0.802714,0.802306,0.80425


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7317    0.6831    0.7066       994
           1     0.8116    0.8982    0.8527      2701
           2     0.7022    0.5405    0.6108       964

    accuracy                         0.7783      4659
   macro avg     0.7485    0.7072    0.7234      4659
weighted avg     0.7719    0.7783    0.7715      4659

0.7714791277987585


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-8/checkpoint-583
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-8/checkpoint-583/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-8/checkpoint-583/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7356    0.7726    0.7537       994
           1     0.8823    0.8523    0.8670      2701
           2     0.6402    0.6680    0.6538       964

    accuracy                         0.7972      4659
   macro avg     0.7527    0.7643    0.7582      4659
weighted avg     0.8009    0.7972    0.7987      4659

0.7987362183442277


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-8/checkpoint-1166
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-8/checkpoint-1166/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-8/checkpoint-1166/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7385    0.7897    0.7632       994
           1     0.8691    0.8778    0.8735      2701
           2     0.6809    0.6131    0.6452       964

    accuracy                         0.8042      4659
   macro avg     0.7628    0.7602    0.7606      4659
weighted avg     0.8023    0.8042    0.8027      4659

0.802714256498369


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-8/checkpoint-1749
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-8/checkpoint-1749/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-8/checkpoint-1749/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-8/checkpoint-1166 (score: 0.4965546727180481).
[32m[I 2022-02-12 14:14:43,481][0m Trial 8 finished with value: 3.2135197015062835 and parameters: {'learning_rate': 3.8013548517568867e-05, 'num_train_epochs': 3, 'seed': 39, 'warmup_steps': 274, 'weight_decay': 0.038291400492122485, 'per_device_train_batch_size': 64}. Best is trial 4 with value: 3.2148099108683876.[0m
Trial:
loading configuration file https://huggingface.co/distilbert-base-

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.5632,0.542018,0.770766,0.772611,0.780993,0.770766
2,0.4305,0.512798,0.804464,0.800182,0.800715,0.804464
3,0.2688,0.552453,0.812835,0.811793,0.811225,0.812835


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.6329    0.8099    0.7105       994
           1     0.8804    0.8204    0.8494      2701
           2     0.6552    0.5913    0.6216       964

    accuracy                         0.7708      4659
   macro avg     0.7228    0.7405    0.7272      4659
weighted avg     0.7810    0.7708    0.7726      4659

0.7726114441656149


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-9/checkpoint-2330
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-9/checkpoint-2330/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-9/checkpoint-2330/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7307    0.7998    0.7637       994
           1     0.8574    0.8926    0.8747      2701
           2     0.7141    0.5622    0.6291       964

    accuracy                         0.8045      4659
   macro avg     0.7674    0.7516    0.7558      4659
weighted avg     0.8007    0.8045    0.8002      4659

0.8001823063059155


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-9/checkpoint-4660
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-9/checkpoint-4660/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-9/checkpoint-4660/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7699    0.8078    0.7884       994
           1     0.8787    0.8823    0.8805      2701
           2     0.6648    0.6234    0.6435       964

    accuracy                         0.8128      4659
   macro avg     0.7711    0.7712    0.7708      4659
weighted avg     0.8112    0.8128    0.8118      4659

0.811793371147101


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-9/checkpoint-6990
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-9/checkpoint-6990/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-9/checkpoint-6990/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-9/checkpoint-4660 (score: 0.5127978324890137).
[32m[I 2022-02-12 14:20:12,090][0m Trial 9 finished with value: 3.2486894835841547 and parameters: {'learning_rate': 6.579941859647633e-05, 'num_train_epochs': 3, 'seed': 22, 'warmup_steps': 464, 'weight_decay': 0.2894897525763174, 'per_device_train_batch_size': 16}. Best is trial 9 with value: 3.2486894835841547.[0m
Trial:
loading configuration file https://huggingface.co/distilbert-base-unc

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.5758,0.574004,0.769908,0.769804,0.773055,0.769908


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.6556    0.7777    0.7115       994
           1     0.8595    0.8360    0.8476      2701
           2     0.6518    0.5768    0.6120       964

    accuracy                         0.7699      4659
   macro avg     0.7223    0.7301    0.7237      4659
weighted avg     0.7731    0.7699    0.7698      4659

0.7698040456657698


[32m[I 2022-02-12 14:21:59,972][0m Trial 10 pruned. [0m
Trial:
loading configuration file https://huggingface.co/distilbert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/23454919702d26495337f3da04d1655c7ee010d5ec9d77bdb9e399e00302c0a1.91b885ab15d631bf9cee9dc9d25ece0afd932f2f5130eba28f2055b2220c0333
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "tr

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.5639,0.533038,0.776776,0.77717,0.779199,0.776776
2,0.425,0.531701,0.80146,0.7997,0.799411,0.80146


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.6812    0.7716    0.7236       994
           1     0.8667    0.8427    0.8545      2701
           2     0.6351    0.5975    0.6157       964

    accuracy                         0.7768      4659
   macro avg     0.7277    0.7373    0.7313      4659
weighted avg     0.7792    0.7768    0.7772      4659

0.777170108224493


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-11/checkpoint-4659
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-11/checkpoint-4659/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-11/checkpoint-4659/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7295    0.7867    0.7570       994
           1     0.8668    0.8771    0.8719      2701
           2     0.6827    0.6048    0.6414       964

    accuracy                         0.8015      4659
   macro avg     0.7597    0.7562    0.7568      4659
weighted avg     0.7994    0.8015    0.7997      4659

0.7996996413986973


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-11/checkpoint-9318
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-11/checkpoint-9318/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-11/checkpoint-9318/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-11/checkpoint-9318 (score: 0.5317009687423706).
[32m[I 2022-02-12 14:27:36,513][0m Trial 11 finished with value: 3.202029842354337 and parameters: {'learning_rate': 2.8241349884339874e-05, 'num_train_epochs': 2, 'seed': 35, 'warmup_steps': 150, 'weight_decay': 0.09097200032265645, 'per_device_train_batch_size': 8}. Best is trial 9 with value: 3.2486894835841547.[0m
Trial:
loading configuration file https://huggingface.co/distilbert-bas

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.5227,0.557774,0.765186,0.762487,0.771341,0.765186


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.6116    0.7938    0.6909       994
           1     0.8579    0.8493    0.8536      2701
           2     0.6935    0.5000    0.5811       964

    accuracy                         0.7652      4659
   macro avg     0.7210    0.7144    0.7085      4659
weighted avg     0.7713    0.7652    0.7625      4659

0.7624866858812082


[32m[I 2022-02-12 14:30:23,411][0m Trial 12 pruned. [0m
Trial:
loading configuration file https://huggingface.co/distilbert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/23454919702d26495337f3da04d1655c7ee010d5ec9d77bdb9e399e00302c0a1.91b885ab15d631bf9cee9dc9d25ece0afd932f2f5130eba28f2055b2220c0333
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "tr

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.5576,0.537427,0.784288,0.776393,0.778494,0.784288
2,0.4551,0.495857,0.790084,0.792429,0.796479,0.790084


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7340    0.7022    0.7177       994
           1     0.8135    0.9078    0.8581      2701
           2     0.7262    0.5228    0.6080       964

    accuracy                         0.7843      4659
   macro avg     0.7579    0.7109    0.7279      4659
weighted avg     0.7785    0.7843    0.7764      4659

0.7763929250503012


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-13/checkpoint-2330
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-13/checkpoint-2330/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-13/checkpoint-2330/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7166    0.7887    0.7510       994
           1     0.8868    0.8378    0.8616      2701
           2     0.6259    0.6577    0.6414       964

    accuracy                         0.7901      4659
   macro avg     0.7431    0.7614    0.7513      4659
weighted avg     0.7965    0.7901    0.7924      4659

0.7924292370066351


[32m[I 2022-02-12 14:34:02,478][0m Trial 13 pruned. [0m
Trial:
loading configuration file https://huggingface.co/distilbert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/23454919702d26495337f3da04d1655c7ee010d5ec9d77bdb9e399e00302c0a1.91b885ab15d631bf9cee9dc9d25ece0afd932f2f5130eba28f2055b2220c0333
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "tr

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.613,0.590818,0.768835,0.766465,0.767431,0.768835


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.6628    0.7475    0.7026       994
           1     0.8426    0.8545    0.8485      2701
           2     0.6646    0.5508    0.6024       964

    accuracy                         0.7688      4659
   macro avg     0.7233    0.7176    0.7178      4659
weighted avg     0.7674    0.7688    0.7665      4659

0.7664647673624138


[32m[I 2022-02-12 14:36:46,012][0m Trial 14 pruned. [0m
Trial:
loading configuration file https://huggingface.co/distilbert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/23454919702d26495337f3da04d1655c7ee010d5ec9d77bdb9e399e00302c0a1.91b885ab15d631bf9cee9dc9d25ece0afd932f2f5130eba28f2055b2220c0333
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "tr

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.5656,0.524527,0.786435,0.780204,0.782134,0.786435
2,0.4445,0.514944,0.799528,0.796322,0.796884,0.799528


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7009    0.7545    0.7267       994
           1     0.8340    0.8926    0.8623      2701
           2     0.7206    0.5218    0.6053       964

    accuracy                         0.7864      4659
   macro avg     0.7518    0.7230    0.7314      4659
weighted avg     0.7821    0.7864    0.7802      4659

0.780203717564679


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-15/checkpoint-4659
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-15/checkpoint-4659/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-15/checkpoint-4659/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7180    0.7968    0.7554       994
           1     0.8592    0.8812    0.8700      2701
           2     0.7036    0.5737    0.6320       964

    accuracy                         0.7995      4659
   macro avg     0.7603    0.7505    0.7525      4659
weighted avg     0.7969    0.7995    0.7963      4659

0.7963219934859517


[32m[I 2022-02-12 14:42:17,138][0m Trial 15 pruned. [0m
Trial:
loading configuration file https://huggingface.co/distilbert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/23454919702d26495337f3da04d1655c7ee010d5ec9d77bdb9e399e00302c0a1.91b885ab15d631bf9cee9dc9d25ece0afd932f2f5130eba28f2055b2220c0333
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "tr

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.7452,0.698688,0.704014,0.693615,0.689865,0.704014


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.6378    0.6378    0.6378       994
           1     0.7699    0.8449    0.8056      2701
           2     0.5193    0.3776    0.4372       964

    accuracy                         0.7040      4659
   macro avg     0.6423    0.6201    0.6269      4659
weighted avg     0.6899    0.7040    0.6936      4659

0.6936153444921317


[32m[I 2022-02-12 14:44:04,332][0m Trial 16 pruned. [0m
Trial:
loading configuration file https://huggingface.co/distilbert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/23454919702d26495337f3da04d1655c7ee010d5ec9d77bdb9e399e00302c0a1.91b885ab15d631bf9cee9dc9d25ece0afd932f2f5130eba28f2055b2220c0333
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "tr

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.5786,0.525789,0.784503,0.7788,0.778129,0.784503
2,0.4451,0.497069,0.797811,0.797332,0.798636,0.797811


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7236    0.7163    0.7199       994
           1     0.8298    0.8956    0.8615      2701
           2     0.6895    0.5436    0.6079       964

    accuracy                         0.7845      4659
   macro avg     0.7476    0.7185    0.7298      4659
weighted avg     0.7781    0.7845    0.7788      4659

0.7787995796566424


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-17/checkpoint-1165
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-17/checkpoint-1165/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-17/checkpoint-1165/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7189    0.8129    0.7630       994
           1     0.8776    0.8604    0.8689      2701
           2     0.6595    0.6068    0.6321       964

    accuracy                         0.7978      4659
   macro avg     0.7520    0.7600    0.7547      4659
weighted avg     0.7986    0.7978    0.7973      4659

0.7973317548047276


[32m[I 2022-02-12 14:46:55,531][0m Trial 17 pruned. [0m
Trial:
loading configuration file https://huggingface.co/distilbert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/23454919702d26495337f3da04d1655c7ee010d5ec9d77bdb9e399e00302c0a1.91b885ab15d631bf9cee9dc9d25ece0afd932f2f5130eba28f2055b2220c0333
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "tr

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6259,0.60034,0.758103,0.754806,0.754185,0.758103


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.6691    0.7264    0.6966       994
           1     0.8293    0.8526    0.8408      2701
           2     0.6314    0.5259    0.5739       964

    accuracy                         0.7581      4659
   macro avg     0.7099    0.7016    0.7037      4659
weighted avg     0.7542    0.7581    0.7548      4659

0.7548056945979844


[32m[I 2022-02-12 14:48:42,963][0m Trial 18 pruned. [0m
Trial:
loading configuration file https://huggingface.co/distilbert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/23454919702d26495337f3da04d1655c7ee010d5ec9d77bdb9e399e00302c0a1.91b885ab15d631bf9cee9dc9d25ece0afd932f2f5130eba28f2055b2220c0333
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "tr

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.556,0.540625,0.786864,0.784192,0.784337,0.786864
2,0.4612,0.555088,0.796952,0.790412,0.792449,0.796952


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.6994    0.7656    0.7310       994
           1     0.8509    0.8708    0.8608      2701
           2     0.6853    0.5737    0.6245       964

    accuracy                         0.7869      4659
   macro avg     0.7452    0.7367    0.7388      4659
weighted avg     0.7843    0.7869    0.7842      4659

0.7841922674162086


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-19/checkpoint-4659
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-19/checkpoint-4659/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-19/checkpoint-4659/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7245    0.7857    0.7539       994
           1     0.8407    0.8989    0.8688      2701
           2     0.7273    0.5228    0.6083       964

    accuracy                         0.7970      4659
   macro avg     0.7642    0.7358    0.7437      4659
weighted avg     0.7924    0.7970    0.7904      4659

0.790411867865357


[32m[I 2022-02-12 14:54:15,260][0m Trial 19 pruned. [0m
Trial:
loading configuration file https://huggingface.co/distilbert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/23454919702d26495337f3da04d1655c7ee010d5ec9d77bdb9e399e00302c0a1.91b885ab15d631bf9cee9dc9d25ece0afd932f2f5130eba28f2055b2220c0333
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "tr

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.5674,0.517422,0.788367,0.782804,0.783161,0.788367
2,0.4154,0.491683,0.805967,0.803451,0.803904,0.805967


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7211    0.7596    0.7398       994
           1     0.8346    0.8893    0.8611      2701
           2     0.7030    0.5353    0.6078       964

    accuracy                         0.7884      4659
   macro avg     0.7529    0.7280    0.7362      4659
weighted avg     0.7832    0.7884    0.7828      4659

0.7828036654442388


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-20/checkpoint-2330
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-20/checkpoint-2330/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-20/checkpoint-2330/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7300    0.8189    0.7719       994
           1     0.8710    0.8800    0.8755      2701
           2     0.6920    0.5851    0.6341       964

    accuracy                         0.8060      4659
   macro avg     0.7644    0.7613    0.7605      4659
weighted avg     0.8039    0.8060    0.8035      4659

0.8034511546265247


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-20/checkpoint-4660
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-20/checkpoint-4660/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-20/checkpoint-4660/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-20/checkpoint-4660 (score: 0.49168333411216736).
[32m[I 2022-02-12 14:57:56,564][0m Trial 20 finished with value: 3.2192888472141696 and parameters: {'learning_rate': 4.591455580099434e-05, 'num_train_epochs': 2, 'seed': 25, 'warmup_steps': 225, 'weight_decay': 0.1661852327680115, 'per_device_train_batch_size': 16}. Best is trial 9 with value: 3.2486894835841547.[0m
Trial:
loading configuration file https://huggingface.co/distilbert-ba

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.5419,0.523503,0.784288,0.779088,0.778618,0.784288
2,0.4187,0.494062,0.799313,0.796393,0.795709,0.799313


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7204    0.7233    0.7219       994
           1     0.8295    0.8900    0.8587      2701
           2     0.6959    0.5508    0.6149       964

    accuracy                         0.7843      4659
   macro avg     0.7486    0.7214    0.7319      4659
weighted avg     0.7786    0.7843    0.7791      4659

0.7790884791850295


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-21/checkpoint-2330
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-21/checkpoint-2330/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-21/checkpoint-2330/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7358    0.7817    0.7580       994
           1     0.8563    0.8823    0.8691      2701
           2     0.6878    0.5851    0.6323       964

    accuracy                         0.7993      4659
   macro avg     0.7600    0.7497    0.7531      4659
weighted avg     0.7957    0.7993    0.7964      4659

0.7963926016439064


[32m[I 2022-02-12 15:01:36,014][0m Trial 21 pruned. [0m
Trial:
loading configuration file https://huggingface.co/distilbert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/23454919702d26495337f3da04d1655c7ee010d5ec9d77bdb9e399e00302c0a1.91b885ab15d631bf9cee9dc9d25ece0afd932f2f5130eba28f2055b2220c0333
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "tr

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.5631,0.523726,0.786006,0.777954,0.779057,0.786006
2,0.4146,0.491583,0.802962,0.802376,0.802794,0.802962


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7468    0.7093    0.7276       994
           1     0.8172    0.9104    0.8613      2701
           2     0.7054    0.5166    0.5964       964

    accuracy                         0.7860      4659
   macro avg     0.7565    0.7121    0.7284      4659
weighted avg     0.7791    0.7860    0.7780      4659

0.7779535955678275


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-22/checkpoint-2330
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-22/checkpoint-2330/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-22/checkpoint-2330/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7311    0.7988    0.7635       994
           1     0.8786    0.8704    0.8745      2701
           2     0.6644    0.6183    0.6405       964

    accuracy                         0.8030      4659
   macro avg     0.7580    0.7625    0.7595      4659
weighted avg     0.8028    0.8030    0.8024      4659

0.8023758031128712


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-22/checkpoint-4660
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-22/checkpoint-4660/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-22/checkpoint-4660/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-22/checkpoint-4660 (score: 0.49158304929733276).
[32m[I 2022-02-12 15:05:14,860][0m Trial 22 finished with value: 3.2110939142827677 and parameters: {'learning_rate': 5.736610942081283e-05, 'num_train_epochs': 2, 'seed': 29, 'warmup_steps': 323, 'weight_decay': 0.21003181844745372, 'per_device_train_batch_size': 16}. Best is trial 9 with value: 3.2486894835841547.[0m
Trial:
loading configuration file https://huggingface.co/distilbert-b

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.5679,0.52735,0.78944,0.785098,0.783995,0.78944
2,0.4211,0.499829,0.800816,0.801204,0.802821,0.800816
3,0.3143,0.543686,0.80146,0.800922,0.801504,0.80146


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7342    0.7223    0.7282       994
           1     0.8361    0.8912    0.8627      2701
           2     0.6895    0.5737    0.6263       964

    accuracy                         0.7894      4659
   macro avg     0.7532    0.7290    0.7391      4659
weighted avg     0.7840    0.7894    0.7851      4659

0.7850979437333856


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-23/checkpoint-2330
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-23/checkpoint-2330/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-23/checkpoint-2330/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7206    0.8018    0.7590       994
           1     0.8840    0.8604    0.8720      2701
           2     0.6602    0.6328    0.6462       964

    accuracy                         0.8008      4659
   macro avg     0.7549    0.7650    0.7591      4659
weighted avg     0.8028    0.8008    0.8012      4659

0.8012042676883507


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-23/checkpoint-4660
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-23/checkpoint-4660/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-23/checkpoint-4660/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7294    0.8028    0.7644       994
           1     0.8804    0.8693    0.8748      2701
           2     0.6548    0.6100    0.6316       964

    accuracy                         0.8015      4659
   macro avg     0.7549    0.7607    0.7569      4659
weighted avg     0.8015    0.8015    0.8009      4659

0.8009219891701026


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-23/checkpoint-6990
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-23/checkpoint-6990/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-23/checkpoint-6990/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-23/checkpoint-4660 (score: 0.4998287558555603).
[32m[I 2022-02-12 15:10:42,532][0m Trial 23 finished with value: 3.205344819549575 and parameters: {'learning_rate': 2.7213255631668047e-05, 'num_train_epochs': 3, 'seed': 18, 'warmup_steps': 233, 'weight_decay': 0.2986372926158696, 'per_device_train_batch_size': 16}. Best is trial 9 with value: 3.2486894835841547.[0m
Trial:
loading configuration file https://huggingface.co/distilbert-bas

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.5895,0.526113,0.788796,0.783521,0.782786,0.788796
2,0.4195,0.512061,0.795879,0.793904,0.793803,0.795879


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7375    0.7264    0.7319       994
           1     0.8315    0.8952    0.8622      2701
           2     0.6930    0.5550    0.6164       964

    accuracy                         0.7888      4659
   macro avg     0.7540    0.7255    0.7368      4659
weighted avg     0.7828    0.7888    0.7835      4659

0.783520794657633


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-24/checkpoint-2330
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-24/checkpoint-2330/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-24/checkpoint-2330/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7220    0.7918    0.7553       994
           1     0.8639    0.8719    0.8679      2701
           2     0.6714    0.5871    0.6265       964

    accuracy                         0.7959      4659
   macro avg     0.7524    0.7503    0.7499      4659
weighted avg     0.7938    0.7959    0.7939      4659

0.7939039479190795


[32m[I 2022-02-12 15:14:18,078][0m Trial 24 pruned. [0m
Trial:
loading configuration file https://huggingface.co/distilbert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/23454919702d26495337f3da04d1655c7ee010d5ec9d77bdb9e399e00302c0a1.91b885ab15d631bf9cee9dc9d25ece0afd932f2f5130eba28f2055b2220c0333
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "tr

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.5759,0.541958,0.784718,0.785323,0.786025,0.784718
2,0.4103,0.522692,0.801245,0.798572,0.798943,0.801245


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7164    0.7344    0.7253       994
           1     0.8665    0.8552    0.8608      2701
           2     0.6324    0.6390    0.6357       964

    accuracy                         0.7847      4659
   macro avg     0.7384    0.7429    0.7406      4659
weighted avg     0.7860    0.7847    0.7853      4659

0.7853229612810536


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-25/checkpoint-4659
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-25/checkpoint-4659/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-25/checkpoint-4659/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7201    0.7998    0.7579       994
           1     0.8650    0.8800    0.8725      2701
           2     0.6952    0.5820    0.6335       964

    accuracy                         0.8012      4659
   macro avg     0.7601    0.7539    0.7546      4659
weighted avg     0.7989    0.8012    0.7986      4659

0.79857216213819


[32m[I 2022-02-12 15:19:47,485][0m Trial 25 pruned. [0m
Trial:
loading configuration file https://huggingface.co/distilbert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/23454919702d26495337f3da04d1655c7ee010d5ec9d77bdb9e399e00302c0a1.91b885ab15d631bf9cee9dc9d25ece0afd932f2f5130eba28f2055b2220c0333
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "tr

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6416,0.519838,0.78622,0.781371,0.78306,0.78622
2,0.4702,0.490654,0.801674,0.800365,0.800011,0.801674


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.6955    0.7837    0.7370       994
           1     0.8439    0.8786    0.8609      2701
           2     0.7029    0.5301    0.6044       964

    accuracy                         0.7862      4659
   macro avg     0.7474    0.7308    0.7341      4659
weighted avg     0.7831    0.7862    0.7814      4659

0.7813714492011276


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-26/checkpoint-583
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-26/checkpoint-583/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-26/checkpoint-583/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7357    0.7867    0.7603       994
           1     0.8694    0.8749    0.8721      2701
           2     0.6720    0.6120    0.6406       964

    accuracy                         0.8017      4659
   macro avg     0.7590    0.7579    0.7577      4659
weighted avg     0.8000    0.8017    0.8004      4659

0.8003652606156981


[32m[I 2022-02-12 15:22:18,825][0m Trial 26 pruned. [0m
Trial:
loading configuration file https://huggingface.co/distilbert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/23454919702d26495337f3da04d1655c7ee010d5ec9d77bdb9e399e00302c0a1.91b885ab15d631bf9cee9dc9d25ece0afd932f2f5130eba28f2055b2220c0333
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "tr

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.594,0.552673,0.777849,0.772213,0.771763,0.777849


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7112    0.7062    0.7087       994
           1     0.8230    0.8886    0.8545      2701
           2     0.6905    0.5415    0.6070       964

    accuracy                         0.7778      4659
   macro avg     0.7416    0.7121    0.7234      4659
weighted avg     0.7718    0.7778    0.7722      4659

0.7722133853687699


[32m[I 2022-02-12 15:23:43,195][0m Trial 27 pruned. [0m
Trial:
loading configuration file https://huggingface.co/distilbert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/23454919702d26495337f3da04d1655c7ee010d5ec9d77bdb9e399e00302c0a1.91b885ab15d631bf9cee9dc9d25ece0afd932f2f5130eba28f2055b2220c0333
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "tr

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.5584,0.511172,0.793089,0.790092,0.790592,0.793089
2,0.4249,0.508188,0.794376,0.792376,0.799604,0.794376


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7053    0.7827    0.7420       994
           1     0.8567    0.8767    0.8666      2701
           2     0.6932    0.5695    0.6253       964

    accuracy                         0.7931      4659
   macro avg     0.7518    0.7430    0.7446      4659
weighted avg     0.7906    0.7931    0.7901      4659

0.7900915932736149


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-28/checkpoint-2330
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-28/checkpoint-2330/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-28/checkpoint-2330/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.6669    0.8561    0.7498       994
           1     0.8796    0.8571    0.8682      2701
           2     0.7124    0.5550    0.6239       964

    accuracy                         0.7944      4659
   macro avg     0.7530    0.7561    0.7473      4659
weighted avg     0.7996    0.7944    0.7924      4659

0.7923758973857472


[32m[I 2022-02-12 15:27:18,739][0m Trial 28 pruned. [0m
Trial:
loading configuration file https://huggingface.co/distilbert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/23454919702d26495337f3da04d1655c7ee010d5ec9d77bdb9e399e00302c0a1.91b885ab15d631bf9cee9dc9d25ece0afd932f2f5130eba28f2055b2220c0333
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "tr

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.627,0.580313,0.766044,0.760567,0.759322,0.766044


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.6902    0.6992    0.6947       994
           1     0.8222    0.8782    0.8493      2701
           2     0.6545    0.5207    0.5800       964

    accuracy                         0.7660      4659
   macro avg     0.7223    0.6994    0.7080      4659
weighted avg     0.7593    0.7660    0.7606      4659

0.7605673796199659


[32m[I 2022-02-12 15:28:43,390][0m Trial 29 pruned. [0m
Trial:
loading configuration file https://huggingface.co/distilbert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/23454919702d26495337f3da04d1655c7ee010d5ec9d77bdb9e399e00302c0a1.91b885ab15d631bf9cee9dc9d25ece0afd932f2f5130eba28f2055b2220c0333
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "tr

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.61,0.591354,0.764112,0.759556,0.758956,0.764112


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.6804    0.7304    0.7045       994
           1     0.8268    0.8645    0.8452      2701
           2     0.6497    0.5176    0.5762       964

    accuracy                         0.7641      4659
   macro avg     0.7190    0.7042    0.7087      4659
weighted avg     0.7590    0.7641    0.7596      4659

0.7595559809272101


[32m[I 2022-02-12 15:31:27,322][0m Trial 30 pruned. [0m
Trial:
loading configuration file https://huggingface.co/distilbert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/23454919702d26495337f3da04d1655c7ee010d5ec9d77bdb9e399e00302c0a1.91b885ab15d631bf9cee9dc9d25ece0afd932f2f5130eba28f2055b2220c0333
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "tr

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.7004,0.539908,0.780854,0.775506,0.774802,0.780854
2,0.4968,0.491065,0.800816,0.797448,0.797026,0.800816


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7218    0.6942    0.7077       994
           1     0.8236    0.8919    0.8564      2701
           2     0.6928    0.5591    0.6188       964

    accuracy                         0.7809      4659
   macro avg     0.7460    0.7151    0.7276      4659
weighted avg     0.7748    0.7809    0.7755      4659

0.7755060233337965


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-31/checkpoint-583
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-31/checkpoint-583/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-31/checkpoint-583/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7335    0.7837    0.7578       994
           1     0.8559    0.8863    0.8709      2701
           2     0.6975    0.5788    0.6327       964

    accuracy                         0.8008      4659
   macro avg     0.7623    0.7496    0.7538      4659
weighted avg     0.7970    0.8008    0.7974      4659

0.7974482887443389


[32m[I 2022-02-12 15:33:58,647][0m Trial 31 pruned. [0m
Trial:
loading configuration file https://huggingface.co/distilbert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/23454919702d26495337f3da04d1655c7ee010d5ec9d77bdb9e399e00302c0a1.91b885ab15d631bf9cee9dc9d25ece0afd932f2f5130eba28f2055b2220c0333
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "tr

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6989,0.55888,0.770122,0.768219,0.771498,0.770122


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7580    0.5956    0.6670       994
           1     0.8414    0.8778    0.8592      2701
           2     0.5896    0.6483    0.6176       964

    accuracy                         0.7701      4659
   macro avg     0.7297    0.7072    0.7146      4659
weighted avg     0.7715    0.7701    0.7682      4659

0.7682188740339928


[32m[I 2022-02-12 15:35:13,667][0m Trial 32 pruned. [0m
Trial:
loading configuration file https://huggingface.co/distilbert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/23454919702d26495337f3da04d1655c7ee010d5ec9d77bdb9e399e00302c0a1.91b885ab15d631bf9cee9dc9d25ece0afd932f2f5130eba28f2055b2220c0333
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "tr

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.7205,0.55151,0.774844,0.769574,0.767744,0.774844


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7226    0.6841    0.7028       994
           1     0.8270    0.8904    0.8576      2701
           2     0.6481    0.5446    0.5919       964

    accuracy                         0.7748      4659
   macro avg     0.7326    0.7064    0.7174      4659
weighted avg     0.7677    0.7748    0.7696      4659

0.7695736922053926


[32m[I 2022-02-12 15:36:28,814][0m Trial 33 pruned. [0m
Trial:
loading configuration file https://huggingface.co/distilbert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/23454919702d26495337f3da04d1655c7ee010d5ec9d77bdb9e399e00302c0a1.91b885ab15d631bf9cee9dc9d25ece0afd932f2f5130eba28f2055b2220c0333
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "tr

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.726,0.580225,0.761966,0.761854,0.775023,0.761966


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.5974    0.8209    0.6915       994
           1     0.8661    0.8238    0.8444      2701
           2     0.7030    0.5280    0.6031       964

    accuracy                         0.7620      4659
   macro avg     0.7222    0.7242    0.7130      4659
weighted avg     0.7750    0.7620    0.7619      4659

0.7618537217680869


[32m[I 2022-02-12 15:37:44,531][0m Trial 34 pruned. [0m
Trial:
loading configuration file https://huggingface.co/distilbert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/23454919702d26495337f3da04d1655c7ee010d5ec9d77bdb9e399e00302c0a1.91b885ab15d631bf9cee9dc9d25ece0afd932f2f5130eba28f2055b2220c0333
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "tr

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6697,0.536138,0.779566,0.778558,0.779357,0.779566
2,0.4697,0.481131,0.806825,0.806504,0.807258,0.806825
3,0.3482,0.513604,0.808113,0.80721,0.807051,0.808113


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7644    0.6660    0.7118       994
           1     0.8437    0.8712    0.8572      2701
           2     0.6145    0.6400    0.6270       964

    accuracy                         0.7796      4659
   macro avg     0.7409    0.7257    0.7320      4659
weighted avg     0.7794    0.7796    0.7786      4659

0.7785575795769097


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-35/checkpoint-583
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-35/checkpoint-583/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-35/checkpoint-583/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7308    0.8028    0.7651       994
           1     0.8811    0.8697    0.8753      2701
           2     0.6792    0.6349    0.6563       964

    accuracy                         0.8068      4659
   macro avg     0.7637    0.7691    0.7656      4659
weighted avg     0.8073    0.8068    0.8065      4659

0.806504000710417


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-35/checkpoint-1166
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-35/checkpoint-1166/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-35/checkpoint-1166/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7542    0.8089    0.7806       994
           1     0.8761    0.8741    0.8751      2701
           2     0.6682    0.6224    0.6445       964

    accuracy                         0.8081      4659
   macro avg     0.7661    0.7685    0.7667      4659
weighted avg     0.8071    0.8081    0.8072      4659

0.8072100797295699


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-35/checkpoint-1749
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-35/checkpoint-1749/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-35/checkpoint-1749/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-35/checkpoint-1166 (score: 0.4811306893825531).
[32m[I 2022-02-12 15:41:35,402][0m Trial 35 finished with value: 3.2304877050408587 and parameters: {'learning_rate': 6.387782087336463e-05, 'num_train_epochs': 3, 'seed': 27, 'warmup_steps': 195, 'weight_decay': 0.030071139791874396, 'per_device_train_batch_size': 64}. Best is trial 9 with value: 3.2486894835841547.[0m
Trial:
loading configuration file https://huggingface.co/distilbert-b

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.5748,0.515685,0.791372,0.789652,0.78845,0.791372
2,0.4156,0.486278,0.802318,0.800883,0.800775,0.802318


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7332    0.7243    0.7287       994
           1     0.8534    0.8775    0.8653      2701
           2     0.6633    0.6193    0.6406       964

    accuracy                         0.7914      4659
   macro avg     0.7500    0.7404    0.7449      4659
weighted avg     0.7885    0.7914    0.7897      4659

0.7896522237283408


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-36/checkpoint-1165
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-36/checkpoint-1165/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-36/checkpoint-1165/pytorch_model.bin
The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7311    0.7958    0.7620       994
           1     0.8731    0.8760    0.8745      2701
           2     0.6701    0.6027    0.6346       964

    accuracy                         0.8023      4659
   macro avg     0.7581    0.7581    0.7571      4659
weighted avg     0.8008    0.8023    0.8009      4659

0.8008830593413219


Saving model checkpoint to /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-36/checkpoint-2330
Configuration saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-36/checkpoint-2330/config.json
Model weights saved in /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-36/checkpoint-2330/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from /content/drive/MyDrive/Dissertation/disbert_hate_twit/hyper/results/run-36/checkpoint-2330 (score: 0.48627781867980957).
[32m[I 2022-02-12 15:44:28,725][0m Trial 36 finished with value: 3.2062945264027176 and parameters: {'learning_rate': 6.60022542733528e-05, 'num_train_epochs': 2, 'seed': 27, 'warmup_steps': 193, 'weight_decay': 0.03362492622452753, 'per_device_train_batch_size': 32}. Best is trial 9 with value: 3.2486894835841547.[0m
Trial:
loading configuration file https://huggingface.co/distilbert-ba

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.5514,0.54026,0.770981,0.774159,0.781431,0.770981


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.6566    0.7847    0.7149       994
           1     0.8835    0.8141    0.8474      2701
           2     0.6242    0.6359    0.6300       964

    accuracy                         0.7710      4659
   macro avg     0.7214    0.7449    0.7308      4659
weighted avg     0.7814    0.7710    0.7742      4659

0.7741586100152303


[32m[I 2022-02-12 15:46:17,430][0m Trial 37 pruned. [0m
Trial:
loading configuration file https://huggingface.co/distilbert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/23454919702d26495337f3da04d1655c7ee010d5ec9d77bdb9e399e00302c0a1.91b885ab15d631bf9cee9dc9d25ece0afd932f2f5130eba28f2055b2220c0333
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "tr

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.708,0.562966,0.775274,0.775353,0.775439,0.775274


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7136    0.7093    0.7114       994
           1     0.8526    0.8523    0.8524      2701
           2     0.6231    0.6276    0.6253       964

    accuracy                         0.7753      4659
   macro avg     0.7297    0.7297    0.7297      4659
weighted avg     0.7754    0.7753    0.7754      4659

0.7753534652816131


[32m[I 2022-02-12 15:47:32,568][0m Trial 38 pruned. [0m
Trial:
loading configuration file https://huggingface.co/distilbert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/23454919702d26495337f3da04d1655c7ee010d5ec9d77bdb9e399e00302c0a1.91b885ab15d631bf9cee9dc9d25ece0afd932f2f5130eba28f2055b2220c0333
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased",
  "activation": "gelu",
  "architectures": [
    "DistilBertForMaskedLM"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "initializer_range": 0.02,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "tr

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6069,0.571793,0.776132,0.772837,0.771481,0.776132


The following columns in the evaluation set  don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: input_ids_bert, sentence, token_type_ids_bert, __index_level_0__, attention_mask_bert.
***** Running Evaluation *****
  Num examples = 4659
  Batch size = 16


              precision    recall  f1-score   support

           0     0.7067    0.7103    0.7085       994
           1     0.8332    0.8726    0.8524      2701
           2     0.6655    0.5737    0.6162       964

    accuracy                         0.7761      4659
   macro avg     0.7351    0.7189    0.7257      4659
weighted avg     0.7715    0.7761    0.7728      4659

0.7728368609924559


[32m[I 2022-02-12 15:50:17,389][0m Trial 39 pruned. [0m


In [None]:
timestamp()

12-Feb-2022 (15:50:17.397670)


In [None]:
best_trial

BestRun(run_id='9', objective=3.2486894835841547, hyperparameters={'learning_rate': 6.579941859647633e-05, 'num_train_epochs': 3, 'seed': 22, 'warmup_steps': 464, 'weight_decay': 0.2894897525763174, 'per_device_train_batch_size': 16})