In [1]:
import shutil
import pandas as pd
import numpy as np
import os
import transformers
from transformers import  AutoModelForSequenceClassification, DataCollatorWithPadding
from transformers import TrainingArguments, Trainer
from datasets import load_dataset
from sklearn.model_selection import train_test_split 

# the name of the pre-trained model we want to use
MODEL_NAME = "bert-base-uncased" 

  from .autonotebook import tqdm as notebook_tqdm


## Data Loading and Preprocessing

In [2]:
data = pd.read_parquet("../labeled_post.parquet")
data.shape

(80571, 15)

### Creating the label

In [3]:
flairs = {
    'Not the A-hole': "NTA",
    'Asshole': "YTA",
#     'No A-holes here': "NAH",
#     'Everyone Sucks': "ESH",
#     'Not enough info': "INFO",
    # 'UPDATE': "?",
    # 'TL;DR': "?",
    'not the a-hole': "NTA",
    # 'POO Mode Activated 💩',
    'asshole': "YTA",
    # '': "?",
    # 'META': "?",
#     'Shitpost': "?",
    'not the asshole': "NTA",
#     'no a--holes here': "NAH",
#     'everyone sucks': "ESH",
#     'too close to call': "?",
#     'not enough info': "INFO",
}

# filter data and label verdicts
labeled_data = data.loc[data.link_flair_text.isin(flairs)].copy()
labeled_data["verdict"] = labeled_data.link_flair_text.map(flairs)
labeled_data["verdict"] = (labeled_data["verdict"] == "YTA").astype(int)
labeled_data["verdict"].value_counts()

0    57633
1    14135
Name: verdict, dtype: int64

In [4]:
negative_data = labeled_data[labeled_data["verdict"] == 0]
positive_data = labeled_data[labeled_data["verdict"] == 1]
negative_data = negative_data.sample(len(positive_data), random_state=547)

In [5]:
labeled_data = pd.concat([negative_data, positive_data])

### Creating the textual input

In [6]:
from gensim.parsing import remove_stopwords, strip_numeric, strip_punctuation, strip_multiple_whitespaces

labeled_data["title_text"] = labeled_data["title"] + " --- " + labeled_data["selftext"]
labeled_data["title_text"] = labeled_data["title_text"].str.lower()
labeled_data["title_text"] = labeled_data["title_text"].apply(strip_multiple_whitespaces)
labeled_data = labeled_data.replace(to_replace=[''], value=np.nan).dropna(subset=["title_text", "verdict"])

In [7]:
final_data = labeled_data[["title_text", "verdict"]]
final_data.head()

Unnamed: 0,title_text,verdict
15509,aita for not stopping my little brother from d...,0
54638,aita for setting my little brother into a fit ...,0
21845,wibta if i called out my landlord? --- i (29f)...,0
13497,aita for not feeding my cousin’s dog? --- my f...,0
26178,aita for telling my step dad i no longer want ...,0


### Data splitting

In [8]:
train, val = train_test_split(final_data, test_size=0.2, random_state=547)
train.to_csv("./data/train.csv", index=False)
val.to_csv("./data/val.csv", index=False)

In [9]:
dataset = load_dataset("csv", data_files={"train": "./data/train.csv", "val": "./data/val.csv"})

Using custom data configuration default-db78e356e0e09e10


Downloading and preparing dataset csv/default to C:\Users\Wenhao\.cache\huggingface\datasets\csv\default-db78e356e0e09e10\0.0.0\652c3096f041ee27b04d2232d41f10547a8fecda3e284a79a0ec4053c916ef7a...


Downloading data files: 100%|██████████| 2/2 [00:00<?, ?it/s]
Extracting data files: 100%|██████████| 2/2 [00:00<00:00, 443.65it/s]
                                

Dataset csv downloaded and prepared to C:\Users\Wenhao\.cache\huggingface\datasets\csv\default-db78e356e0e09e10\0.0.0\652c3096f041ee27b04d2232d41f10547a8fecda3e284a79a0ec4053c916ef7a. Subsequent calls will reuse this data.


100%|██████████| 2/2 [00:00<00:00, 363.44it/s]


## Tokenization

In [10]:
from transformers import AutoTokenizer

### Preprocess Setup ###
# tokenization hyperparameters
PADDING = 'max_length' # padding strategy
PADDING_SIDE = 'right' # the side on which the model should have padding applied
TRUNCATION = True # truncate strategy
TRUNCATION_SIDE = 'right' # the side on which the model should have truncation applied
MAX_LEN = 512 # maximum length to use by one of the truncation/padding parameters

# Load the pre-trained tokenmizer ###
TOKENIZER = AutoTokenizer.from_pretrained(
    MODEL_NAME,
    padding_side=PADDING_SIDE,
    truncation_side=TRUNCATION_SIDE,
)

# Define the preprocess function ###
def preprocess_function(examples):
    """
    Preprocess the description field
    ---
    Arguments:
    examples (str, List[str], List[List[str]]: the sequence or batch of sequences to be encoded/tokenized

    Returns:
    tokenized (transformers.BatchEncoding): tokenized descriptions 
    """
    tokenized = TOKENIZER(
        examples["title_text"],
        padding=PADDING,
        truncation=TRUNCATION,
        max_length=MAX_LEN,
    )

    return tokenized

In [11]:
tokenized_dataset = dataset.map(preprocess_function, batched=True)
tokenized_dataset = tokenized_dataset.remove_columns("title_text")

100%|██████████| 23/23 [00:05<00:00,  4.19ba/s]
100%|██████████| 6/6 [00:01<00:00,  4.35ba/s]


In [12]:
tokenized_dataset = tokenized_dataset.rename_column("verdict", "label")

## Evaluation

In [13]:
import numpy as np
from datasets import load_metric
from sklearn.metrics import matthews_corrcoef, cohen_kappa_score, confusion_matrix, balanced_accuracy_score

### Evaluation Metrics ###
metric_acc = load_metric("accuracy")
metric_f1 = load_metric("f1")
metric_precision = load_metric("precision")
metric_recall = load_metric("recall")
metric_auc = load_metric("roc_auc")

def compute_metrics(eval_pred):
    """
    Compute the metrics 
    ---
    Arguments:
    eval_pred (tuple): the predicted logits and truth labels

    Returns:
    metrics (dict{str: float}): contains the computed metrics 
    """
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    prediction_scores = np.max(logits, axis=-1)
    print(logits.shape, labels.shape)
    print(predictions.shape, prediction_scores.shape)

    pred_true = np.count_nonzero(predictions)
    pred_false = predictions.shape[0] - pred_true
    actual_true = np.count_nonzero(labels)
    actual_false = labels.shape[0] - actual_true

    acc = metric_acc.compute(predictions=predictions, references=labels)['accuracy']
    f1 = metric_f1.compute(predictions=predictions, references=labels)['f1']
    precision = metric_precision.compute(predictions=predictions, references=labels)['precision']
    recall = metric_recall.compute(predictions=predictions, references=labels)['recall']
    roc_auc = metric_auc.compute(prediction_scores=predictions, references=labels)['roc_auc']
    matthews_correlation = matthews_corrcoef(y_true=labels, y_pred=predictions)
    cohen_kappa = cohen_kappa_score(y1=labels, y2=predictions)
    balanced_accuracy = balanced_accuracy_score(y_true=labels, y_pred=predictions)

    tn, fp, fn, tp = confusion_matrix(y_true=labels, y_pred=predictions).ravel()
    specificity = tn / (tn + fp)
    sensitivity = tp / (tp + fn)
    informedness = specificity + sensitivity - 1

    metrics = {
        "pred_true": pred_true,
        "pred_false": pred_false,
        "actual_true": actual_true,
        "actual_false": actual_false,
        "accuracy": acc,
        "f1_score": f1,
        "precision": precision,
        "recall": recall,
        "roc_auc": roc_auc,
        "matthews_correlation": matthews_correlation,
        "cohen_kappa": cohen_kappa,
        "true_negative": tn,
        "false_positive": fp,
        "false_negative": fn,
        "true_positive": tp,
        "specificity": specificity,
        "sensitivity": sensitivity,
        "informedness": informedness,
        "balanced_accuracy": balanced_accuracy
    }
    return metrics

## Model fitting

### Hyperparameters

In [15]:
# model hyperparameters
CLASSIFIER_DROPOUT = 0.15 # dropout ratio for the classification head
NUM_CLASSES = 2 # number of classes

# optimization hyperparameters ###
SEED = 547 # random seed for splitting the data into batches
BATCH_SIZE = 16 # batch size for both training and evaluation
GRAD_ACC_STEPS = 4 # number of steps for gradient accumulation
LR = 5e-5 # initial learning rate
WEIGHT_DECAY = 2e-3 # weight decay to apply in the AdamW optimizer
EPOCHS = 10 # total number of training epochs 
LR_SCHEDULER = "cosine" # type of learning rate scheduler
STRATEGY = "steps" # strategy for logging, evaluation, and saving
STEPS = 200 # number of steps for logging, evaluation, and saving
EVAL_METRIC = "f1_score" # metric for selecting the best model

### Fitting the model

In [16]:
model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME,
    classifier_dropout=CLASSIFIER_DROPOUT,
    num_labels=NUM_CLASSES,
)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [17]:
# set up the training arguments
training_args = TrainingArguments(
    output_dir="./model",
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    gradient_accumulation_steps=GRAD_ACC_STEPS,
    learning_rate=LR,
    weight_decay=WEIGHT_DECAY, 
    num_train_epochs=EPOCHS,
    lr_scheduler_type=LR_SCHEDULER,
    evaluation_strategy=STRATEGY,
    logging_strategy=STRATEGY, 
    save_strategy=STRATEGY,
    eval_steps=STEPS,
    logging_steps=STEPS,
    save_steps=STEPS,
    seed=SEED,
    load_best_model_at_end=True,
    metric_for_best_model=EVAL_METRIC,
    report_to="none"
)

In [18]:
# set up the trainer 
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset['train'],
    eval_dataset=tokenized_dataset['val'],
    tokenizer=TOKENIZER,   
    compute_metrics=compute_metrics,
)

In [19]:
try:
    shutil.rmtree("./model") # remove possible cache
    # shutil.rmtree(best_model_dir)
    # os.remove(best_model_dir_zip)
    os.mkdir("./model")
except:
    None

In [20]:
trainer.train()

***** Running training *****
  Num examples = 22616
  Num Epochs = 10
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 64
  Gradient Accumulation steps = 4
  Total optimization steps = 3530
  6%|▌         | 200/3530 [04:34<1:17:21,  1.39s/it]***** Running Evaluation *****
  Num examples = 5654
  Batch size = 16


{'loss': 0.6919, 'learning_rate': 4.960502152176574e-05, 'epoch': 0.57}


                                                    
  6%|▌         | 200/3530 [05:20<1:17:21,  1.39s/it]Saving model checkpoint to ./model\checkpoint-200
Configuration saved in ./model\checkpoint-200\config.json


(5654, 2) (5654,)
(5654,) (5654,)
{'eval_loss': 0.6858039498329163, 'eval_pred_true': 1531, 'eval_pred_false': 4123, 'eval_actual_true': 2803, 'eval_actual_false': 2851, 'eval_accuracy': 0.5680933852140078, 'eval_f1_score': 0.4365482233502538, 'eval_precision': 0.6178967994774657, 'eval_recall': 0.33749554049232966, 'eval_roc_auc': 0.566152189748094, 'eval_matthews_correlation': 0.14886414763463218, 'eval_cohen_kappa': 0.1328117374769574, 'eval_true_negative': 2266, 'eval_false_positive': 585, 'eval_false_negative': 1857, 'eval_true_positive': 946, 'eval_specificity': 0.7948088390038583, 'eval_sensitivity': 0.33749554049232966, 'eval_informedness': 0.132304379496188, 'eval_balanced_accuracy': 0.566152189748094, 'eval_runtime': 45.5841, 'eval_samples_per_second': 124.034, 'eval_steps_per_second': 7.766, 'epoch': 0.57}


Model weights saved in ./model\checkpoint-200\pytorch_model.bin
tokenizer config file saved in ./model\checkpoint-200\tokenizer_config.json
Special tokens file saved in ./model\checkpoint-200\special_tokens_map.json
 11%|█▏        | 400/3530 [10:06<1:14:32,  1.43s/it] ***** Running Evaluation *****
  Num examples = 5654
  Batch size = 16


{'loss': 0.6639, 'learning_rate': 4.843256672692441e-05, 'epoch': 1.13}


                                                    
 11%|█▏        | 400/3530 [10:53<1:14:32,  1.43s/it]Saving model checkpoint to ./model\checkpoint-400
Configuration saved in ./model\checkpoint-400\config.json


(5654, 2) (5654,)
(5654,) (5654,)
{'eval_loss': 0.6469425559043884, 'eval_pred_true': 1915, 'eval_pred_false': 3739, 'eval_actual_true': 2803, 'eval_actual_false': 2851, 'eval_accuracy': 0.6183233109303148, 'eval_f1_score': 0.5426027977956762, 'eval_precision': 0.6684073107049608, 'eval_recall': 0.45665358544416695, 'eval_roc_auc': 0.6169623591899894, 'eval_matthews_correlation': 0.247129249974427, 'eval_cohen_kappa': 0.23455023631622474, 'eval_true_negative': 2216, 'eval_false_positive': 635, 'eval_false_negative': 1523, 'eval_true_positive': 1280, 'eval_specificity': 0.777271132935812, 'eval_sensitivity': 0.45665358544416695, 'eval_informedness': 0.23392471837997886, 'eval_balanced_accuracy': 0.6169623591899894, 'eval_runtime': 47.2955, 'eval_samples_per_second': 119.546, 'eval_steps_per_second': 7.485, 'epoch': 1.13}


Model weights saved in ./model\checkpoint-400\pytorch_model.bin
tokenizer config file saved in ./model\checkpoint-400\tokenizer_config.json
Special tokens file saved in ./model\checkpoint-400\special_tokens_map.json
 17%|█▋        | 600/3530 [15:48<1:11:39,  1.47s/it] ***** Running Evaluation *****
  Num examples = 5654
  Batch size = 16


{'loss': 0.6295, 'learning_rate': 4.65196831683292e-05, 'epoch': 1.7}




(5654, 2) (5654,)
(5654,) (5654,)


                                                    
 17%|█▋        | 600/3530 [16:36<1:11:39,  1.47s/it]Saving model checkpoint to ./model\checkpoint-600
Configuration saved in ./model\checkpoint-600\config.json


{'eval_loss': 0.6593964695930481, 'eval_pred_true': 3525, 'eval_pred_false': 2129, 'eval_actual_true': 2803, 'eval_actual_false': 2851, 'eval_accuracy': 0.6370711001061196, 'eval_f1_score': 0.6757269279393172, 'eval_precision': 0.6065248226950355, 'eval_recall': 0.7627541919372102, 'eval_roc_auc': 0.638129112804803, 'eval_matthews_correlation': 0.28507424591388675, 'eval_cohen_kappa': 0.27566049890112987, 'eval_true_negative': 1464, 'eval_false_positive': 1387, 'eval_false_negative': 665, 'eval_true_positive': 2138, 'eval_specificity': 0.5135040336723956, 'eval_sensitivity': 0.7627541919372102, 'eval_informedness': 0.2762582256096058, 'eval_balanced_accuracy': 0.6381291128048029, 'eval_runtime': 48.5757, 'eval_samples_per_second': 116.396, 'eval_steps_per_second': 7.288, 'epoch': 1.7}


Model weights saved in ./model\checkpoint-600\pytorch_model.bin
tokenizer config file saved in ./model\checkpoint-600\tokenizer_config.json
Special tokens file saved in ./model\checkpoint-600\special_tokens_map.json
 23%|██▎       | 800/3530 [21:31<1:05:09,  1.43s/it] ***** Running Evaluation *****
  Num examples = 5654
  Batch size = 16


{'loss': 0.5883, 'learning_rate': 4.392681467294117e-05, 'epoch': 2.27}


                                                    
 23%|██▎       | 800/3530 [22:19<1:05:09,  1.43s/it]Saving model checkpoint to ./model\checkpoint-800
Configuration saved in ./model\checkpoint-800\config.json


(5654, 2) (5654,)
(5654,) (5654,)
{'eval_loss': 0.6540129780769348, 'eval_pred_true': 2844, 'eval_pred_false': 2810, 'eval_actual_true': 2803, 'eval_actual_false': 2851, 'eval_accuracy': 0.6439688715953308, 'eval_f1_score': 0.6435275367451744, 'eval_precision': 0.6388888888888888, 'eval_recall': 0.6482340349625402, 'eval_roc_auc': 0.6440047761624346, 'eval_matthews_correlation': 0.2880043807027958, 'eval_cohen_kappa': 0.2879740931845092, 'eval_true_negative': 1824, 'eval_false_positive': 1027, 'eval_false_negative': 986, 'eval_true_positive': 1817, 'eval_specificity': 0.639775517362329, 'eval_sensitivity': 0.6482340349625402, 'eval_informedness': 0.28800955232486913, 'eval_balanced_accuracy': 0.6440047761624346, 'eval_runtime': 47.9635, 'eval_samples_per_second': 117.881, 'eval_steps_per_second': 7.381, 'epoch': 2.27}


Model weights saved in ./model\checkpoint-800\pytorch_model.bin
tokenizer config file saved in ./model\checkpoint-800\tokenizer_config.json
Special tokens file saved in ./model\checkpoint-800\special_tokens_map.json
 28%|██▊       | 1000/3530 [27:14<1:00:49,  1.44s/it]***** Running Evaluation *****
  Num examples = 5654
  Batch size = 16


{'loss': 0.5162, 'learning_rate': 4.073589142096592e-05, 'epoch': 2.83}


                                                     
 28%|██▊       | 1000/3530 [28:01<1:00:49,  1.44s/it]Saving model checkpoint to ./model\checkpoint-1000
Configuration saved in ./model\checkpoint-1000\config.json


(5654, 2) (5654,)
(5654,) (5654,)
{'eval_loss': 0.6895701885223389, 'eval_pred_true': 3096, 'eval_pred_false': 2558, 'eval_actual_true': 2803, 'eval_actual_false': 2851, 'eval_accuracy': 0.6457375309515387, 'eval_f1_score': 0.6604509238854044, 'eval_precision': 0.6291989664082688, 'eval_recall': 0.6949696753478416, 'eval_roc_auc': 0.6461519720127493, 'eval_matthews_correlation': 0.2936257146064592, 'eval_cohen_kappa': 0.29204695697944016, 'eval_true_negative': 1703, 'eval_false_positive': 1148, 'eval_false_negative': 855, 'eval_true_positive': 1948, 'eval_specificity': 0.5973342686776569, 'eval_sensitivity': 0.6949696753478416, 'eval_informedness': 0.29230394402549864, 'eval_balanced_accuracy': 0.6461519720127493, 'eval_runtime': 47.7074, 'eval_samples_per_second': 118.514, 'eval_steps_per_second': 7.42, 'epoch': 2.83}


Model weights saved in ./model\checkpoint-1000\pytorch_model.bin
tokenizer config file saved in ./model\checkpoint-1000\tokenizer_config.json
Special tokens file saved in ./model\checkpoint-1000\special_tokens_map.json
 34%|███▍      | 1200/3530 [32:58<56:47,  1.46s/it]   ***** Running Evaluation *****
  Num examples = 5654
  Batch size = 16


{'loss': 0.4, 'learning_rate': 3.704774109322166e-05, 'epoch': 3.4}


                                                   
 34%|███▍      | 1200/3530 [33:47<56:47,  1.46s/it]Saving model checkpoint to ./model\checkpoint-1200
Configuration saved in ./model\checkpoint-1200\config.json


(5654, 2) (5654,)
(5654,) (5654,)
{'eval_loss': 0.9058732390403748, 'eval_pred_true': 3419, 'eval_pred_false': 2235, 'eval_actual_true': 2803, 'eval_actual_false': 2851, 'eval_accuracy': 0.629642730810046, 'eval_f1_score': 0.6634522661523626, 'eval_precision': 0.6036852880959345, 'eval_recall': 0.7363539065287192, 'eval_roc_auc': 0.6305410360423322, 'eval_matthews_correlation': 0.2669923969146744, 'eval_cohen_kappa': 0.260599962253956, 'eval_true_negative': 1496, 'eval_false_positive': 1355, 'eval_false_negative': 739, 'eval_true_positive': 2064, 'eval_specificity': 0.5247281655559453, 'eval_sensitivity': 0.7363539065287192, 'eval_informedness': 0.2610820720846645, 'eval_balanced_accuracy': 0.6305410360423322, 'eval_runtime': 48.3046, 'eval_samples_per_second': 117.049, 'eval_steps_per_second': 7.328, 'epoch': 3.4}


Model weights saved in ./model\checkpoint-1200\pytorch_model.bin
tokenizer config file saved in ./model\checkpoint-1200\tokenizer_config.json
Special tokens file saved in ./model\checkpoint-1200\special_tokens_map.json
 40%|███▉      | 1400/3530 [38:41<50:04,  1.41s/it]   ***** Running Evaluation *****
  Num examples = 5654
  Batch size = 16


{'loss': 0.3397, 'learning_rate': 3.297890289002451e-05, 'epoch': 3.96}


                                                   
 40%|███▉      | 1400/3530 [39:29<50:04,  1.41s/it]Saving model checkpoint to ./model\checkpoint-1400
Configuration saved in ./model\checkpoint-1400\config.json


(5654, 2) (5654,)
(5654,) (5654,)
{'eval_loss': 0.9191586375236511, 'eval_pred_true': 2889, 'eval_pred_false': 2765, 'eval_actual_true': 2803, 'eval_actual_false': 2851, 'eval_accuracy': 0.6423770781747435, 'eval_f1_score': 0.6447645818692902, 'eval_precision': 0.6351678781585324, 'eval_recall': 0.6546557260078487, 'eval_roc_auc': 0.6424804410467163, 'eval_matthews_correlation': 0.2850191662088481, 'eval_cohen_kappa': 0.28488730163833564, 'eval_true_negative': 1797, 'eval_false_positive': 1054, 'eval_false_negative': 968, 'eval_true_positive': 1835, 'eval_specificity': 0.630305156085584, 'eval_sensitivity': 0.6546557260078487, 'eval_informedness': 0.28496088209343284, 'eval_balanced_accuracy': 0.6424804410467164, 'eval_runtime': 48.3583, 'eval_samples_per_second': 116.919, 'eval_steps_per_second': 7.32, 'epoch': 3.96}


Model weights saved in ./model\checkpoint-1400\pytorch_model.bin
tokenizer config file saved in ./model\checkpoint-1400\tokenizer_config.json
Special tokens file saved in ./model\checkpoint-1400\special_tokens_map.json
 45%|████▌     | 1600/3530 [44:35<49:44,  1.55s/it]  ***** Running Evaluation *****
  Num examples = 5654
  Batch size = 16


{'loss': 0.1987, 'learning_rate': 2.8657945093108886e-05, 'epoch': 4.53}


                                                   
 45%|████▌     | 1600/3530 [45:24<49:44,  1.55s/it]Saving model checkpoint to ./model\checkpoint-1600
Configuration saved in ./model\checkpoint-1600\config.json


(5654, 2) (5654,)
(5654,) (5654,)
{'eval_loss': 1.236769437789917, 'eval_pred_true': 2949, 'eval_pred_false': 2705, 'eval_actual_true': 2803, 'eval_actual_false': 2851, 'eval_accuracy': 0.631411390166254, 'eval_f1_score': 0.6376912378303199, 'eval_precision': 0.6219057307561885, 'eval_recall': 0.6542989653942205, 'eval_roc_auc': 0.6316040600383941, 'eval_matthews_correlation': 0.2634440655042421, 'eval_cohen_kappa': 0.2630927607585727, 'eval_true_negative': 1736, 'eval_false_positive': 1115, 'eval_false_negative': 969, 'eval_true_positive': 1834, 'eval_specificity': 0.6089091546825676, 'eval_sensitivity': 0.6542989653942205, 'eval_informedness': 0.2632081200767882, 'eval_balanced_accuracy': 0.6316040600383941, 'eval_runtime': 49.9022, 'eval_samples_per_second': 113.302, 'eval_steps_per_second': 7.094, 'epoch': 4.53}


Model weights saved in ./model\checkpoint-1600\pytorch_model.bin
tokenizer config file saved in ./model\checkpoint-1600\tokenizer_config.json
Special tokens file saved in ./model\checkpoint-1600\special_tokens_map.json
 51%|█████     | 1800/3530 [50:37<44:00,  1.53s/it]  ***** Running Evaluation *****
  Num examples = 5654
  Batch size = 16


{'loss': 0.1748, 'learning_rate': 2.422140252928601e-05, 'epoch': 5.1}


                                                   
 51%|█████     | 1800/3530 [51:28<44:00,  1.53s/it]Saving model checkpoint to ./model\checkpoint-1800
Configuration saved in ./model\checkpoint-1800\config.json


(5654, 2) (5654,)
(5654,) (5654,)
{'eval_loss': 1.4957462549209595, 'eval_pred_true': 2961, 'eval_pred_false': 2693, 'eval_actual_true': 2803, 'eval_actual_false': 2851, 'eval_accuracy': 0.6268128758401132, 'eval_f1_score': 0.6339347675225537, 'eval_precision': 0.6170212765957447, 'eval_recall': 0.6518016410988227, 'eval_roc_auc': 0.6270232337377664, 'eval_matthews_correlation': 0.2543231754805003, 'eval_cohen_kappa': 0.2539259763268834, 'eval_true_negative': 1717, 'eval_false_positive': 1134, 'eval_false_negative': 976, 'eval_true_positive': 1827, 'eval_specificity': 0.60224482637671, 'eval_sensitivity': 0.6518016410988227, 'eval_informedness': 0.2540464674755327, 'eval_balanced_accuracy': 0.6270232337377664, 'eval_runtime': 50.5751, 'eval_samples_per_second': 111.794, 'eval_steps_per_second': 6.999, 'epoch': 5.1}


Model weights saved in ./model\checkpoint-1800\pytorch_model.bin
tokenizer config file saved in ./model\checkpoint-1800\tokenizer_config.json
Special tokens file saved in ./model\checkpoint-1800\special_tokens_map.json
 57%|█████▋    | 2000/3530 [56:25<40:23,  1.58s/it]  ***** Running Evaluation *****
  Num examples = 5654
  Batch size = 16


{'loss': 0.0973, 'learning_rate': 1.9809462304994314e-05, 'epoch': 5.66}


                                                   
 57%|█████▋    | 2000/3530 [57:14<40:23,  1.58s/it]Saving model checkpoint to ./model\checkpoint-2000
Configuration saved in ./model\checkpoint-2000\config.json


(5654, 2) (5654,)
(5654,) (5654,)
{'eval_loss': 1.5220891237258911, 'eval_pred_true': 2821, 'eval_pred_false': 2833, 'eval_actual_true': 2803, 'eval_actual_false': 2851, 'eval_accuracy': 0.6204457021577644, 'eval_f1_score': 0.6184210526315789, 'eval_precision': 0.6164480680609713, 'eval_recall': 0.6204067070995362, 'eval_roc_auc': 0.6204453738935072, 'eval_matthews_correlation': 0.24088260933963873, 'eval_cohen_kappa': 0.24087772631520998, 'eval_true_negative': 1769, 'eval_false_positive': 1082, 'eval_false_negative': 1064, 'eval_true_positive': 1739, 'eval_specificity': 0.6204840406874781, 'eval_sensitivity': 0.6204067070995362, 'eval_informedness': 0.24089074778701436, 'eval_balanced_accuracy': 0.6204453738935072, 'eval_runtime': 49.1839, 'eval_samples_per_second': 114.956, 'eval_steps_per_second': 7.197, 'epoch': 5.66}


Model weights saved in ./model\checkpoint-2000\pytorch_model.bin
tokenizer config file saved in ./model\checkpoint-2000\tokenizer_config.json
Special tokens file saved in ./model\checkpoint-2000\special_tokens_map.json
 62%|██████▏   | 2200/3530 [1:02:08<33:23,  1.51s/it]***** Running Evaluation *****
  Num examples = 5654
  Batch size = 16


{'loss': 0.0751, 'learning_rate': 1.5561534135101884e-05, 'epoch': 6.23}


                                                     
 62%|██████▏   | 2200/3530 [1:02:57<33:23,  1.51s/it]Saving model checkpoint to ./model\checkpoint-2200
Configuration saved in ./model\checkpoint-2200\config.json


(5654, 2) (5654,)
(5654,) (5654,)
{'eval_loss': 1.9584472179412842, 'eval_pred_true': 2594, 'eval_pred_false': 3060, 'eval_actual_true': 2803, 'eval_actual_false': 2851, 'eval_accuracy': 0.6206225680933852, 'eval_f1_score': 0.6025569760978322, 'eval_precision': 0.6268311488049345, 'eval_recall': 0.5800927577595434, 'eval_roc_auc': 0.6202813841410835, 'eval_matthews_correlation': 0.24137532480661578, 'eval_cohen_kappa': 0.24071385918225097, 'eval_true_negative': 1883, 'eval_false_positive': 968, 'eval_false_negative': 1177, 'eval_true_positive': 1626, 'eval_specificity': 0.6604700105226237, 'eval_sensitivity': 0.5800927577595434, 'eval_informedness': 0.24056276828216694, 'eval_balanced_accuracy': 0.6202813841410835, 'eval_runtime': 48.4612, 'eval_samples_per_second': 116.671, 'eval_steps_per_second': 7.305, 'epoch': 6.23}


Model weights saved in ./model\checkpoint-2200\pytorch_model.bin
tokenizer config file saved in ./model\checkpoint-2200\tokenizer_config.json
Special tokens file saved in ./model\checkpoint-2200\special_tokens_map.json
 68%|██████▊   | 2400/3530 [1:07:49<27:27,  1.46s/it]  ***** Running Evaluation *****
  Num examples = 5654
  Batch size = 16


{'loss': 0.0594, 'learning_rate': 1.1611845235944144e-05, 'epoch': 6.8}


                                                     
 68%|██████▊   | 2400/3530 [1:08:38<27:27,  1.46s/it]Saving model checkpoint to ./model\checkpoint-2400
Configuration saved in ./model\checkpoint-2400\config.json


(5654, 2) (5654,)
(5654,) (5654,)
{'eval_loss': 1.999822735786438, 'eval_pred_true': 2621, 'eval_pred_false': 3033, 'eval_actual_true': 2803, 'eval_actual_false': 2851, 'eval_accuracy': 0.6222143615139724, 'eval_f1_score': 0.6061946902654867, 'eval_precision': 0.6272415108737123, 'eval_recall': 0.5865144488048519, 'eval_roc_auc': 0.6219138361176136, 'eval_matthews_correlation': 0.24446879647095257, 'eval_cohen_kappa': 0.24396101908292056, 'eval_true_negative': 1874, 'eval_false_positive': 977, 'eval_false_negative': 1159, 'eval_true_positive': 1644, 'eval_specificity': 0.6573132234303753, 'eval_sensitivity': 0.5865144488048519, 'eval_informedness': 0.24382767223522728, 'eval_balanced_accuracy': 0.6219138361176136, 'eval_runtime': 48.5263, 'eval_samples_per_second': 116.514, 'eval_steps_per_second': 7.295, 'epoch': 6.8}


Model weights saved in ./model\checkpoint-2400\pytorch_model.bin
tokenizer config file saved in ./model\checkpoint-2400\tokenizer_config.json
Special tokens file saved in ./model\checkpoint-2400\special_tokens_map.json
 74%|███████▎  | 2600/3530 [1:13:34<22:30,  1.45s/it]  ***** Running Evaluation *****
  Num examples = 5654
  Batch size = 16


{'loss': 0.0425, 'learning_rate': 8.085198976392127e-06, 'epoch': 7.36}


                                                     
 74%|███████▎  | 2600/3530 [1:14:23<22:30,  1.45s/it]Saving model checkpoint to ./model\checkpoint-2600
Configuration saved in ./model\checkpoint-2600\config.json


(5654, 2) (5654,)
(5654,) (5654,)
{'eval_loss': 2.142937421798706, 'eval_pred_true': 2973, 'eval_pred_false': 2681, 'eval_actual_true': 2803, 'eval_actual_false': 2851, 'eval_accuracy': 0.622568093385214, 'eval_f1_score': 0.6305401662049861, 'eval_precision': 0.6125126135216953, 'eval_recall': 0.6496610774170531, 'eval_roc_auc': 0.6227961648046333, 'eval_matthews_correlation': 0.2459116448092844, 'eval_cohen_kappa': 0.24546700597703708, 'eval_true_negative': 1699, 'eval_false_positive': 1152, 'eval_false_negative': 982, 'eval_true_positive': 1821, 'eval_specificity': 0.5959312521922132, 'eval_sensitivity': 0.6496610774170531, 'eval_informedness': 0.24559232960926636, 'eval_balanced_accuracy': 0.6227961648046332, 'eval_runtime': 48.9888, 'eval_samples_per_second': 115.414, 'eval_steps_per_second': 7.226, 'epoch': 7.36}


Model weights saved in ./model\checkpoint-2600\pytorch_model.bin
tokenizer config file saved in ./model\checkpoint-2600\tokenizer_config.json
Special tokens file saved in ./model\checkpoint-2600\special_tokens_map.json
 79%|███████▉  | 2800/3530 [1:19:18<17:34,  1.45s/it]  ***** Running Evaluation *****
  Num examples = 5654
  Batch size = 16


{'loss': 0.0262, 'learning_rate': 5.093031306275309e-06, 'epoch': 7.93}


                                                     
 79%|███████▉  | 2800/3530 [1:20:07<17:34,  1.45s/it]Saving model checkpoint to ./model\checkpoint-2800
Configuration saved in ./model\checkpoint-2800\config.json


(5654, 2) (5654,)
(5654,) (5654,)
{'eval_loss': 2.319601535797119, 'eval_pred_true': 3078, 'eval_pred_false': 2576, 'eval_actual_true': 2803, 'eval_actual_false': 2851, 'eval_accuracy': 0.623452423063318, 'eval_f1_score': 0.6379867369494983, 'eval_precision': 0.6094866796621182, 'eval_recall': 0.6692829111666072, 'eval_roc_auc': 0.6238382286453871, 'eval_matthews_correlation': 0.24864953273523127, 'eval_cohen_kappa': 0.24747207191640153, 'eval_true_negative': 1649, 'eval_false_positive': 1202, 'eval_false_negative': 927, 'eval_true_positive': 1876, 'eval_specificity': 0.5783935461241669, 'eval_sensitivity': 0.6692829111666072, 'eval_informedness': 0.24767645729077414, 'eval_balanced_accuracy': 0.6238382286453871, 'eval_runtime': 48.5156, 'eval_samples_per_second': 116.54, 'eval_steps_per_second': 7.297, 'epoch': 7.93}


Model weights saved in ./model\checkpoint-2800\pytorch_model.bin
tokenizer config file saved in ./model\checkpoint-2800\tokenizer_config.json
Special tokens file saved in ./model\checkpoint-2800\special_tokens_map.json
 85%|████████▍ | 3000/3530 [1:25:03<13:14,  1.50s/it]  ***** Running Evaluation *****
  Num examples = 5654
  Batch size = 16


{'loss': 0.021, 'learning_rate': 2.7298895722308565e-06, 'epoch': 8.5}


                                                     
 85%|████████▍ | 3000/3530 [1:25:51<13:14,  1.50s/it]Saving model checkpoint to ./model\checkpoint-3000
Configuration saved in ./model\checkpoint-3000\config.json


(5654, 2) (5654,)
(5654,) (5654,)
{'eval_loss': 2.3879358768463135, 'eval_pred_true': 3034, 'eval_pred_false': 2620, 'eval_actual_true': 2803, 'eval_actual_false': 2851, 'eval_accuracy': 0.6248673505482844, 'eval_f1_score': 0.6366284050025698, 'eval_precision': 0.6123928806855636, 'eval_recall': 0.6628612201212986, 'eval_roc_auc': 0.6251871867004247, 'eval_matthews_correlation': 0.2510392328385998, 'eval_cohen_kappa': 0.2502007966339944, 'eval_true_negative': 1675, 'eval_false_positive': 1176, 'eval_false_negative': 945, 'eval_true_positive': 1858, 'eval_specificity': 0.587513153279551, 'eval_sensitivity': 0.6628612201212986, 'eval_informedness': 0.25037437340084967, 'eval_balanced_accuracy': 0.6251871867004248, 'eval_runtime': 48.5938, 'eval_samples_per_second': 116.352, 'eval_steps_per_second': 7.285, 'epoch': 8.5}


Model weights saved in ./model\checkpoint-3000\pytorch_model.bin
tokenizer config file saved in ./model\checkpoint-3000\tokenizer_config.json
Special tokens file saved in ./model\checkpoint-3000\special_tokens_map.json
 91%|█████████ | 3200/3530 [1:30:50<08:15,  1.50s/it]  ***** Running Evaluation *****
  Num examples = 5654
  Batch size = 16


{'loss': 0.0139, 'learning_rate': 1.0704449843359499e-06, 'epoch': 9.07}


                                                     
 91%|█████████ | 3200/3530 [1:31:39<08:15,  1.50s/it]Saving model checkpoint to ./model\checkpoint-3200
Configuration saved in ./model\checkpoint-3200\config.json


(5654, 2) (5654,)
(5654,) (5654,)
{'eval_loss': 2.457062244415283, 'eval_pred_true': 2962, 'eval_pred_false': 2692, 'eval_actual_true': 2803, 'eval_actual_false': 2851, 'eval_accuracy': 0.6248673505482844, 'eval_f1_score': 0.6320901994796184, 'eval_precision': 0.6151249155975692, 'eval_recall': 0.6500178380306814, 'eval_roc_auc': 0.6250790698396129, 'eval_matthews_correlation': 0.250434836722643, 'eval_cohen_kappa': 0.250038742145413, 'eval_true_negative': 1711, 'eval_false_positive': 1140, 'eval_false_negative': 981, 'eval_true_positive': 1822, 'eval_specificity': 0.6001403016485444, 'eval_sensitivity': 0.6500178380306814, 'eval_informedness': 0.25015813967922584, 'eval_balanced_accuracy': 0.6250790698396129, 'eval_runtime': 49.1522, 'eval_samples_per_second': 115.03, 'eval_steps_per_second': 7.202, 'epoch': 9.07}


Model weights saved in ./model\checkpoint-3200\pytorch_model.bin
tokenizer config file saved in ./model\checkpoint-3200\tokenizer_config.json
Special tokens file saved in ./model\checkpoint-3200\special_tokens_map.json
 96%|█████████▋| 3400/3530 [1:36:41<03:13,  1.49s/it]  ***** Running Evaluation *****
  Num examples = 5654
  Batch size = 16


{'loss': 0.0122, 'learning_rate': 1.6713313443384726e-07, 'epoch': 9.63}


                                                     
 96%|█████████▋| 3400/3530 [1:37:30<03:13,  1.49s/it]Saving model checkpoint to ./model\checkpoint-3400
Configuration saved in ./model\checkpoint-3400\config.json


(5654, 2) (5654,)
(5654,) (5654,)
{'eval_loss': 2.4857797622680664, 'eval_pred_true': 2988, 'eval_pred_false': 2666, 'eval_actual_true': 2803, 'eval_actual_false': 2851, 'eval_accuracy': 0.623452423063318, 'eval_f1_score': 0.632360559488862, 'eval_precision': 0.6127844712182061, 'eval_recall': 0.6532286835533357, 'eval_roc_auc': 0.6237030825693721, 'eval_matthews_correlation': 0.24779943172486887, 'eval_cohen_kappa': 0.24726878247098194, 'eval_true_negative': 1694, 'eval_false_positive': 1157, 'eval_false_negative': 972, 'eval_true_positive': 1831, 'eval_specificity': 0.5941774815854086, 'eval_sensitivity': 0.6532286835533357, 'eval_informedness': 0.24740616513874425, 'eval_balanced_accuracy': 0.6237030825693721, 'eval_runtime': 49.3725, 'eval_samples_per_second': 114.517, 'eval_steps_per_second': 7.17, 'epoch': 9.63}


Model weights saved in ./model\checkpoint-3400\pytorch_model.bin
tokenizer config file saved in ./model\checkpoint-3400\tokenizer_config.json
Special tokens file saved in ./model\checkpoint-3400\special_tokens_map.json
100%|██████████| 3530/3530 [1:40:49<00:00,  1.49s/it]

Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./model\checkpoint-600 (score: 0.6757269279393172).
100%|██████████| 3530/3530 [1:40:49<00:00,  1.71s/it]

{'train_runtime': 6049.7342, 'train_samples_per_second': 37.383, 'train_steps_per_second': 0.583, 'train_loss': 0.2583849422992458, 'epoch': 10.0}





TrainOutput(global_step=3530, training_loss=0.2583849422992458, metrics={'train_runtime': 6049.7342, 'train_samples_per_second': 37.383, 'train_steps_per_second': 0.583, 'train_loss': 0.2583849422992458, 'epoch': 10.0})

In [22]:
model.save_pretrained("./model/first_model")

Configuration saved in ./model/first_model\config.json
Model weights saved in ./model/first_model\pytorch_model.bin
