In [39]:
import shutil
import pandas as pd
import numpy as np
import os
import transformers
from transformers import  AutoModelForSequenceClassification, DataCollatorWithPadding
from transformers import TrainingArguments, Trainer
from datasets import load_dataset
from sklearn.model_selection import train_test_split 

# the name of the pre-trained model we want to use
MODEL_NAME = "bert-base-uncased" 

## Data Loading and Preprocessing

In [40]:
data = pd.read_parquet("../labeled_post_enriched_bert.parquet")
data = data.drop(["index", "link_flair_text_y", "Unnamed: 0"], axis=1)
data = data.rename({"link_flair_text_x": "link_flair_text"}, axis=1)

In [41]:
data.columns

Index(['link_flair_text', 'num_comments', 'over_18', 'score', 'url',
       'selftext', 'title', 'id', 'edited', 'is_self', 'permalink', 'downs',
       'ups', 'created', 'date', 'Topic', 'Count', 'Name', 'Representation',
       'verdict', 'is_asshole', 'demonyms', 'demonyms_words_count',
       'demonyms_unique_count', 'features', 'title_uppercase_count',
       'title_word_count', 'title_profanity_count', 'avg_word_length',
       'stop_word_count', 'numerics_count', 'uppercase_words_count',
       'sentence_count', 'avg_sentence_length', 'profanity_count', 'gender',
       'age', 'llama_gender', 'llama_age', 'gender_F', 'gender_M', 'gender_U',
       'title_anger', 'title_disgust', 'title_fear', 'title_joy',
       'title_others', 'title_sadness', 'title_surprise', 'selftext_anger',
       'selftext_disgust', 'selftext_fear', 'selftext_joy', 'selftext_others',
       'selftext_sadness', 'selftext_surprise'],
      dtype='object')

In [42]:
# data = pd.read_parquet("../labeled_post.parquet")
# data.shape

### Creating the label

In [43]:
data["link_flair_text"].value_counts()

Not the A-hole     55095
Asshole            13142
No A-holes here     3470
Everyone Sucks      2951
Name: link_flair_text, dtype: int64

In [44]:
flairs = {
    "Not the A-hole": "NTA",
    "No A-holes here": "NTA",
    "Asshole": "YTA",
    "Everyone Sucks": "YTA"
}

# filter data and label verdicts
labeled_data = data.loc[data.link_flair_text.isin(flairs)].copy()
labeled_data["verdict"] = labeled_data.link_flair_text.map(flairs)
labeled_data["verdict"] = (labeled_data["verdict"] == "YTA").astype(int)
labeled_data["verdict"].value_counts()

0    58565
1    16093
Name: verdict, dtype: int64

In [45]:
negative_data = labeled_data[labeled_data["verdict"] == 0]
positive_data = labeled_data[labeled_data["verdict"] == 1]
negative_data = negative_data.sample(len(positive_data), random_state=547)

In [46]:
labeled_data = pd.concat([negative_data, positive_data])
labeled_data["verdict"].value_counts()

0    16093
1    16093
Name: verdict, dtype: int64

### Creating the textual input

In [47]:
labeled_data["gender"] = labeled_data["gender"].map({"F": "Female", "M": "Male", "U": "Unknown"})
labeled_data["age"] = labeled_data["age"].fillna(-1).astype(int).astype(str)

In [48]:
sentiment_cols = ['title_anger', 'title_disgust', 'title_fear', 'title_joy',
       'title_others', 'title_sadness', 'title_surprise', 'selftext_anger',
       'selftext_disgust', 'selftext_fear', 'selftext_joy', 'selftext_others',
       'selftext_sadness', 'selftext_surprise']
for col in sentiment_cols:
    labeled_data[col] = labeled_data[col].round(6).astype(str)

In [49]:
extra_features = ["gender", "age", "Name", 'title_anger', 'title_disgust', 'title_fear', 'title_joy',
       'title_others', 'title_sadness', 'title_surprise', 'selftext_anger',
       'selftext_disgust', 'selftext_fear', 'selftext_joy', 'selftext_others',
       'selftext_sadness', 'selftext_surprise']

In [50]:
labeled_data["input"] = "gender: " + labeled_data["gender"] + " / age: " + labeled_data["age"] + " / topic: " + labeled_data["Name"] + \
    " / title anger score: " + labeled_data["title_anger"] +  " / title disgust score: " + labeled_data["title_disgust"] + \
    " / title fear score: " + labeled_data["title_fear"] +  " / title joy score: " + labeled_data["title_joy"] + \
    " / title others score: " + labeled_data["title_others"] +  " / title sadness score: " + labeled_data["title_sadness"] + \
    " / title surprise score: " + labeled_data["title_surprise"] +  \
    " / text anger score: " + labeled_data["selftext_anger"] +  " / text disgust score: " + labeled_data["selftext_disgust"] + \
    " / text fear score: " + labeled_data["selftext_fear"] +  " / text joy score: " + labeled_data["selftext_joy"] + \
    " / text others score: " + labeled_data["selftext_others"] +  " / text sadness score: " + labeled_data["selftext_sadness"] + \
    " / text surprise score: " + labeled_data["selftext_surprise"] + \
    " / title: " + labeled_data["title"] + " / text: " + labeled_data["selftext"]

In [51]:
labeled_data["input"].iloc[1]

'gender: Female / age: 27 / topic: finances mortgage savings rent / title anger score: 0.001632 / title disgust score: 0.003698 / title fear score: 0.001713 / title joy score: 0.012659 / title others score: 0.976187 / title sadness score: 0.00127 / title surprise score: 0.002842 / text anger score: 0.001026 / text disgust score: 0.005979 / text fear score: 0.003277 / text joy score: 0.009596 / text others score: 0.975425 / text sadness score: 0.00285 / text surprise score: 0.001847 / title: AITA for doubting my BF’s plan to start a small business? / text: For context I (27F) am dating my bf (32m) for about 2 years now. We have started to have some larger discussions regarding family planning, finances, family, marriage, etc. you get the gist. \n\nMy BF and I have very different backgrounds, we both work in corporate and have great benefits, I am child free, have absolutely no debt, trying to buy my first house, and am doing well for myself generally speaking. My BF has a son from a pre

In [52]:
from gensim.parsing import remove_stopwords, strip_numeric, strip_punctuation, strip_multiple_whitespaces

# labeled_data["title_text"] = labeled_data["title"] + " --- " + labeled_data["selftext"]
labeled_data["input"] = labeled_data["input"].str.lower()
labeled_data["input"] = labeled_data["input"].apply(strip_multiple_whitespaces)
labeled_data = labeled_data.replace(to_replace=[''], value=np.nan).dropna(subset=["input", "verdict"])

In [53]:
final_data = labeled_data[["input", "verdict"]]
final_data.head()

Unnamed: 0,input,verdict
8157,gender: female / age: 40 / topic: parents upse...,0
59194,gender: female / age: 27 / topic: finances mor...,0
45681,gender: female / age: 41 / topic: parents upse...,0
10748,gender: male / age: 18 / topic: friendship tal...,0
35567,gender: female / age: 21 / topic: parents upse...,0


### Data splitting

In [54]:
train, val = train_test_split(final_data, test_size=0.2, random_state=547)
train.to_csv("./data/train.csv", index=False)
val.to_csv("./data/val.csv", index=False)

In [55]:
dataset = load_dataset("csv", data_files={"train": "./data/train.csv", "val": "./data/val.csv"})

Using custom data configuration default-99fe7deac6e319af


Downloading and preparing dataset csv/default to C:\Users\Wenhao\.cache\huggingface\datasets\csv\default-99fe7deac6e319af\0.0.0\652c3096f041ee27b04d2232d41f10547a8fecda3e284a79a0ec4053c916ef7a...


Downloading data files: 100%|██████████| 2/2 [00:00<?, ?it/s]
Extracting data files: 100%|██████████| 2/2 [00:00<00:00, 905.80it/s]
                                

Dataset csv downloaded and prepared to C:\Users\Wenhao\.cache\huggingface\datasets\csv\default-99fe7deac6e319af\0.0.0\652c3096f041ee27b04d2232d41f10547a8fecda3e284a79a0ec4053c916ef7a. Subsequent calls will reuse this data.


100%|██████████| 2/2 [00:00<00:00, 332.83it/s]


## Tokenization

In [56]:
from transformers import AutoTokenizer

### Preprocess Setup ###
# tokenization hyperparameters
PADDING = 'max_length' # padding strategy
PADDING_SIDE = 'right' # the side on which the model should have padding applied
TRUNCATION = True # truncate strategy
TRUNCATION_SIDE = 'right' # the side on which the model should have truncation applied
MAX_LEN = 512 # maximum length to use by one of the truncation/padding parameters

# Load the pre-trained tokenmizer ###
TOKENIZER = AutoTokenizer.from_pretrained(
    MODEL_NAME,
    padding_side=PADDING_SIDE,
    truncation_side=TRUNCATION_SIDE,
)

# Define the preprocess function ###
def preprocess_function(examples):
    """
    Preprocess the description field
    ---
    Arguments:
    examples (str, List[str], List[List[str]]: the sequence or batch of sequences to be encoded/tokenized

    Returns:
    tokenized (transformers.BatchEncoding): tokenized descriptions 
    """
    tokenized = TOKENIZER(
        examples["input"],
        padding=PADDING,
        truncation=TRUNCATION,
        max_length=MAX_LEN,
    )

    return tokenized

loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at C:\Users\Wenhao/.cache\huggingface\transformers\3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.21.1",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading file https://huggingface.co/bert-base-uncased/

In [57]:
tokenized_dataset = dataset.map(preprocess_function, batched=True)
tokenized_dataset = tokenized_dataset.remove_columns("input")

100%|██████████| 26/26 [00:09<00:00,  2.78ba/s]
100%|██████████| 7/7 [00:02<00:00,  2.86ba/s]


In [58]:
tokenized_dataset = tokenized_dataset.rename_column("verdict", "label")

In [59]:
tokenized_dataset['train'][0]

{'label': 1,
 'input_ids': [101,
  5907,
  1024,
  3287,
  1013,
  2287,
  1024,
  1011,
  1015,
  1013,
  8476,
  1024,
  6860,
  2831,
  28939,
  6314,
  1013,
  2516,
  4963,
  3556,
  1024,
  1014,
  1012,
  4002,
  21926,
  2575,
  2475,
  1013,
  2516,
  12721,
  3556,
  1024,
  1014,
  1012,
  4002,
  2575,
  23352,
  2487,
  1013,
  2516,
  3571,
  3556,
  1024,
  1014,
  1012,
  4002,
  21926,
  2581,
  2575,
  1013,
  2516,
  6569,
  3556,
  1024,
  1014,
  1012,
  4002,
  2620,
  21619,
  2549,
  1013,
  2516,
  2500,
  3556,
  1024,
  1014,
  1012,
  5989,
  28311,
  21926,
  1013,
  2516,
  12039,
  3556,
  1024,
  1014,
  1012,
  25604,
  2692,
  2575,
  1013,
  2516,
  4474,
  3556,
  1024,
  1014,
  1012,
  4002,
  22394,
  2575,
  2509,
  1013,
  3793,
  4963,
  3556,
  1024,
  1014,
  1012,
  25604,
  2692,
  22275,
  1013,
  3793,
  12721,
  3556,
  1024,
  1014,
  1012,
  5890,
  18827,
  28154,
  1013,
  3793,
  3571,
  3556,
  1024,
  1014,
  1012,
  4002,
  22025

## Evaluation

In [60]:
import numpy as np
from datasets import load_metric
from sklearn.metrics import matthews_corrcoef, cohen_kappa_score, confusion_matrix, balanced_accuracy_score

### Evaluation Metrics ###
metric_acc = load_metric("accuracy")
metric_f1 = load_metric("f1")
metric_precision = load_metric("precision")
metric_recall = load_metric("recall")
metric_auc = load_metric("roc_auc")

def compute_metrics(eval_pred):
    """
    Compute the metrics 
    ---
    Arguments:
    eval_pred (tuple): the predicted logits and truth labels

    Returns:
    metrics (dict{str: float}): contains the computed metrics 
    """
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    prediction_scores = np.max(logits, axis=-1)
    print(logits.shape, labels.shape)
    print(predictions.shape, prediction_scores.shape)

    pred_true = np.count_nonzero(predictions)
    pred_false = predictions.shape[0] - pred_true
    actual_true = np.count_nonzero(labels)
    actual_false = labels.shape[0] - actual_true

    acc = metric_acc.compute(predictions=predictions, references=labels)['accuracy']
    f1 = metric_f1.compute(predictions=predictions, references=labels)['f1']
    precision = metric_precision.compute(predictions=predictions, references=labels)['precision']
    recall = metric_recall.compute(predictions=predictions, references=labels)['recall']
    roc_auc = metric_auc.compute(prediction_scores=predictions, references=labels)['roc_auc']
    matthews_correlation = matthews_corrcoef(y_true=labels, y_pred=predictions)
    cohen_kappa = cohen_kappa_score(y1=labels, y2=predictions)
    balanced_accuracy = balanced_accuracy_score(y_true=labels, y_pred=predictions)

    tn, fp, fn, tp = confusion_matrix(y_true=labels, y_pred=predictions).ravel()
    specificity = tn / (tn + fp)
    sensitivity = tp / (tp + fn)
    informedness = specificity + sensitivity - 1

    metrics = {
        "pred_true": pred_true,
        "pred_false": pred_false,
        "actual_true": actual_true,
        "actual_false": actual_false,
        "accuracy": acc,
        "f1_score": f1,
        "precision": precision,
        "recall": recall,
        "roc_auc": roc_auc,
        "matthews_correlation": matthews_correlation,
        "cohen_kappa": cohen_kappa,
        "true_negative": tn,
        "false_positive": fp,
        "false_negative": fn,
        "true_positive": tp,
        "specificity": specificity,
        "sensitivity": sensitivity,
        "informedness": informedness,
        "balanced_accuracy": balanced_accuracy
    }
    return metrics

## Model fitting

### Hyperparameters

In [61]:
# model hyperparameters
CLASSIFIER_DROPOUT = 0.15 # dropout ratio for the classification head
NUM_CLASSES = 2 # number of classes

# optimization hyperparameters ###
SEED = 547 # random seed for splitting the data into batches
BATCH_SIZE = 16 # batch size for both training and evaluation
GRAD_ACC_STEPS = 2 # number of steps for gradient accumulation
LR = 5e-5 # initial learning rate
WEIGHT_DECAY = 2e-3 # weight decay to apply in the AdamW optimizer
EPOCHS = 3 # total number of training epochs 
LR_SCHEDULER = "cosine" # type of learning rate scheduler
STRATEGY = "steps" # strategy for logging, evaluation, and saving
STEPS = 100 # number of steps for logging, evaluation, and saving
EVAL_METRIC = "f1_score" # metric for selecting the best model

### Fitting the model

In [62]:
model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME,
    classifier_dropout=CLASSIFIER_DROPOUT,
    num_labels=NUM_CLASSES,
)

loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at C:\Users\Wenhao/.cache\huggingface\transformers\3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": 0.15,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.21.1",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weights file https://huggingface.co/bert-base-

In [63]:
# set up the training arguments
training_args = TrainingArguments(
    output_dir="./model",
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    gradient_accumulation_steps=GRAD_ACC_STEPS,
    learning_rate=LR,
    weight_decay=WEIGHT_DECAY, 
    num_train_epochs=EPOCHS,
    lr_scheduler_type=LR_SCHEDULER,
    evaluation_strategy=STRATEGY,
    logging_strategy=STRATEGY, 
    save_strategy=STRATEGY,
    eval_steps=STEPS,
    logging_steps=STEPS,
    save_steps=STEPS,
    seed=SEED,
    load_best_model_at_end=True,
    metric_for_best_model=EVAL_METRIC,
    report_to="none"
)

PyTorch: setting up devices


In [64]:
# set up the trainer 
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset['train'],
    eval_dataset=tokenized_dataset['val'],
    tokenizer=TOKENIZER,   
    compute_metrics=compute_metrics,
)

In [65]:
try:
    shutil.rmtree("./model") # remove possible cache
    # shutil.rmtree(best_model_dir)
    # os.remove(best_model_dir_zip)
    os.mkdir("./model")
except:
    None

In [66]:
trainer.train()

***** Running training *****
  Num examples = 25748
  Num Epochs = 3
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 2
  Total optimization steps = 2415


  1%|          | 28/2415 [04:00<8:38:39, 13.04s/it]

KeyboardInterrupt: 

In [22]:
model.save_pretrained("./model/first_model")

Configuration saved in ./model/first_model\config.json
Model weights saved in ./model/first_model\pytorch_model.bin


## Hyperparameter tuning

Here we tune three hyperparameters -- learning rate, weight_decay, dropout.

In [22]:
import itertools as it

# model hyperparameters
CLASSIFIER_DROPOUT = 0.15 # dropout ratio for the classification head
NUM_CLASSES = 2 # number of classes

# optimization hyperparameters ###
SEED = 547 # random seed for splitting the data into batches
BATCH_SIZE = 32 # batch size for both training and evaluation
GRAD_ACC_STEPS = 1 # number of steps for gradient accumulation
LR = 5e-5 # initial learning rate
WEIGHT_DECAY = 2e-3 # weight decay to apply in the AdamW optimizer
EPOCHS = 3 # total number of training epochs 
LR_SCHEDULER = "cosine" # type of learning rate scheduler
STRATEGY = "steps" # strategy for logging, evaluation, and saving
STEPS = 100 # number of steps for logging, evaluation, and saving
EVAL_METRIC = "f1_score" # metric for selecting the best model

HYPERPARAMETER_GRID = {
    "learning_rate": [5e-5, 3e-5, 2e-5], # recommended by the BERT authors
    "weight_decay": [1/8 * 1e-3, 1/4 * 1e-3, 1/2 * 1e-3], # recommended by the AdamW authors
    "dropout": [1e-1, 3e-1, 5e-1]
}

ALL_COMBINATIONS = it.product(*(HYPERPARAMETER_GRID[key] for key in HYPERPARAMETER_GRID))
ALL_COMBINATIONS = list(ALL_COMBINATIONS)

In [23]:
val_eval = {}

for combination in ALL_COMBINATIONS:
    try:
        shutil.rmtree("./model") # remove possible cache
        # shutil.rmtree(best_model_dir)
        # os.remove(best_model_dir_zip)
        os.mkdir("./model")
    except:
        None

    LR = combination[0]
    WEIGHT_DECAY = combination[1]
    CLASSIFIER_DROPOUT = combination[2]

    print(f"=== Learning rate = {LR}, Weight decay = {WEIGHT_DECAY}, Classifier dropout = {CLASSIFIER_DROPOUT} ===")

    model = AutoModelForSequenceClassification.from_pretrained(
        MODEL_NAME,
        classifier_dropout=CLASSIFIER_DROPOUT,
        num_labels=NUM_CLASSES,
    )

    # set up the training arguments
    training_args = TrainingArguments(
        output_dir="./model",
        per_device_train_batch_size=BATCH_SIZE,
        per_device_eval_batch_size=BATCH_SIZE,
        gradient_accumulation_steps=GRAD_ACC_STEPS,
        learning_rate=LR,
        weight_decay=WEIGHT_DECAY, 
        num_train_epochs=EPOCHS,
        lr_scheduler_type=LR_SCHEDULER,
        evaluation_strategy=STRATEGY,
        logging_strategy=STRATEGY, 
        save_strategy=STRATEGY,
        eval_steps=STEPS,
        logging_steps=STEPS,
        save_steps=STEPS,
        seed=SEED,
        load_best_model_at_end=True,
        metric_for_best_model=EVAL_METRIC,
        report_to="none"
    )

    # set up the trainer 
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_dataset['train'],
        eval_dataset=tokenized_dataset['val'],
        tokenizer=TOKENIZER,   
        compute_metrics=compute_metrics,
    )

    trainer.train()

    val_predictions = trainer.predict(tokenized_dataset["val"])
    val_eval[combination] = val_predictions.metrics

    val_eval_df = pd.DataFrame.from_dict(val_eval).transpose()
    val_eval_df.to_csv(f"metrics/val_evaluation.csv")

=== Learning rate = 5e-05, Weight decay = 0.000125, Classifier dropout = 0.1 ===


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

{'loss': 0.698, 'learning_rate': 4.97887664223157e-05, 'epoch': 0.12}


  _warn_prf(average, modifier, msg_start, len(result))
                                                    
  4%|▍         | 100/2415 [08:36<3:20:45,  5.20s/it]Saving model checkpoint to ./model\checkpoint-100
Configuration saved in ./model\checkpoint-100\config.json


(6438, 2) (6438,)
(6438,) (6438,)
{'eval_loss': 0.6930596232414246, 'eval_pred_true': 0, 'eval_pred_false': 6438, 'eval_actual_true': 3177, 'eval_actual_false': 3261, 'eval_accuracy': 0.5065237651444549, 'eval_f1_score': 0.0, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_roc_auc': 0.5, 'eval_matthews_correlation': 0.0, 'eval_cohen_kappa': 0.0, 'eval_true_negative': 3261, 'eval_false_positive': 0, 'eval_false_negative': 3177, 'eval_true_positive': 0, 'eval_specificity': 1.0, 'eval_sensitivity': 0.0, 'eval_informedness': 0.0, 'eval_balanced_accuracy': 0.5, 'eval_runtime': 177.0984, 'eval_samples_per_second': 36.353, 'eval_steps_per_second': 1.141, 'epoch': 0.12}


Model weights saved in ./model\checkpoint-100\pytorch_model.bin
tokenizer config file saved in ./model\checkpoint-100\tokenizer_config.json
Special tokens file saved in ./model\checkpoint-100\special_tokens_map.json
  8%|▊         | 200/2415 [59:20<20:04:56, 32.64s/it]***** Running Evaluation *****
  Num examples = 6438
  Batch size = 32


{'loss': 0.698, 'learning_rate': 4.91586352592101e-05, 'epoch': 0.25}


  _warn_prf(average, modifier, msg_start, len(result))
                                                     
  8%|▊         | 200/2415 [1:59:00<20:04:56, 32.64s/it]Saving model checkpoint to ./model\checkpoint-200
Configuration saved in ./model\checkpoint-200\config.json


(6438, 2) (6438,)
(6438,) (6438,)
{'eval_loss': 0.6935803890228271, 'eval_pred_true': 0, 'eval_pred_false': 6438, 'eval_actual_true': 3177, 'eval_actual_false': 3261, 'eval_accuracy': 0.5065237651444549, 'eval_f1_score': 0.0, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_roc_auc': 0.5, 'eval_matthews_correlation': 0.0, 'eval_cohen_kappa': 0.0, 'eval_true_negative': 3261, 'eval_false_positive': 0, 'eval_false_negative': 3177, 'eval_true_positive': 0, 'eval_specificity': 1.0, 'eval_sensitivity': 0.0, 'eval_informedness': 0.0, 'eval_balanced_accuracy': 0.5, 'eval_runtime': 3579.6556, 'eval_samples_per_second': 1.798, 'eval_steps_per_second': 0.056, 'epoch': 0.25}


Model weights saved in ./model\checkpoint-200\pytorch_model.bin
tokenizer config file saved in ./model\checkpoint-200\tokenizer_config.json
Special tokens file saved in ./model\checkpoint-200\special_tokens_map.json
 12%|█▏        | 300/2415 [3:00:12<25:16:21, 43.02s/it]   ***** Running Evaluation *****
  Num examples = 6438
  Batch size = 32


{'loss': 0.6952, 'learning_rate': 4.8120254899482665e-05, 'epoch': 0.37}


  _warn_prf(average, modifier, msg_start, len(result))
                                                       
 12%|█▏        | 300/2415 [3:58:23<25:16:21, 43.02s/it]Saving model checkpoint to ./model\checkpoint-300
Configuration saved in ./model\checkpoint-300\config.json


(6438, 2) (6438,)
(6438,) (6438,)
{'eval_loss': 0.693131685256958, 'eval_pred_true': 0, 'eval_pred_false': 6438, 'eval_actual_true': 3177, 'eval_actual_false': 3261, 'eval_accuracy': 0.5065237651444549, 'eval_f1_score': 0.0, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_roc_auc': 0.5, 'eval_matthews_correlation': 0.0, 'eval_cohen_kappa': 0.0, 'eval_true_negative': 3261, 'eval_false_positive': 0, 'eval_false_negative': 3177, 'eval_true_positive': 0, 'eval_specificity': 1.0, 'eval_sensitivity': 0.0, 'eval_informedness': 0.0, 'eval_balanced_accuracy': 0.5, 'eval_runtime': 3491.1992, 'eval_samples_per_second': 1.844, 'eval_steps_per_second': 0.058, 'epoch': 0.37}


Model weights saved in ./model\checkpoint-300\pytorch_model.bin
tokenizer config file saved in ./model\checkpoint-300\tokenizer_config.json
Special tokens file saved in ./model\checkpoint-300\special_tokens_map.json
 17%|█▋        | 400/2415 [4:55:47<24:09:37, 43.16s/it]   ***** Running Evaluation *****
  Num examples = 6438
  Batch size = 32


{'loss': 0.6954, 'learning_rate': 4.669117260700397e-05, 'epoch': 0.5}


  _warn_prf(average, modifier, msg_start, len(result))
                                                       
 17%|█▋        | 400/2415 [5:54:03<24:09:37, 43.16s/it]Saving model checkpoint to ./model\checkpoint-400
Configuration saved in ./model\checkpoint-400\config.json


(6438, 2) (6438,)
(6438,) (6438,)
{'eval_loss': 0.694370687007904, 'eval_pred_true': 0, 'eval_pred_false': 6438, 'eval_actual_true': 3177, 'eval_actual_false': 3261, 'eval_accuracy': 0.5065237651444549, 'eval_f1_score': 0.0, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_roc_auc': 0.5, 'eval_matthews_correlation': 0.0, 'eval_cohen_kappa': 0.0, 'eval_true_negative': 3261, 'eval_false_positive': 0, 'eval_false_negative': 3177, 'eval_true_positive': 0, 'eval_specificity': 1.0, 'eval_sensitivity': 0.0, 'eval_informedness': 0.0, 'eval_balanced_accuracy': 0.5, 'eval_runtime': 3495.5963, 'eval_samples_per_second': 1.842, 'eval_steps_per_second': 0.058, 'epoch': 0.5}


Model weights saved in ./model\checkpoint-400\pytorch_model.bin
tokenizer config file saved in ./model\checkpoint-400\tokenizer_config.json
Special tokens file saved in ./model\checkpoint-400\special_tokens_map.json
 21%|██        | 500/2415 [6:54:33<18:26:13, 34.66s/it]   ***** Running Evaluation *****
  Num examples = 6438
  Batch size = 32


{'loss': 0.696, 'learning_rate': 4.489553799500966e-05, 'epoch': 0.62}


  _warn_prf(average, modifier, msg_start, len(result))
                                                       
 21%|██        | 500/2415 [7:45:55<18:26:13, 34.66s/it]Saving model checkpoint to ./model\checkpoint-500
Configuration saved in ./model\checkpoint-500\config.json


(6438, 2) (6438,)
(6438,) (6438,)
{'eval_loss': 0.6930409669876099, 'eval_pred_true': 0, 'eval_pred_false': 6438, 'eval_actual_true': 3177, 'eval_actual_false': 3261, 'eval_accuracy': 0.5065237651444549, 'eval_f1_score': 0.0, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_roc_auc': 0.5, 'eval_matthews_correlation': 0.0, 'eval_cohen_kappa': 0.0, 'eval_true_negative': 3261, 'eval_false_positive': 0, 'eval_false_negative': 3177, 'eval_true_positive': 0, 'eval_specificity': 1.0, 'eval_sensitivity': 0.0, 'eval_informedness': 0.0, 'eval_balanced_accuracy': 0.5, 'eval_runtime': 3081.879, 'eval_samples_per_second': 2.089, 'eval_steps_per_second': 0.066, 'epoch': 0.62}


Model weights saved in ./model\checkpoint-500\pytorch_model.bin
tokenizer config file saved in ./model\checkpoint-500\tokenizer_config.json
Special tokens file saved in ./model\checkpoint-500\special_tokens_map.json
 25%|██▍       | 600/2415 [8:47:57<19:07:47, 37.94s/it]  ***** Running Evaluation *****
  Num examples = 6438
  Batch size = 32


{'loss': 0.6966, 'learning_rate': 4.2763694929364166e-05, 'epoch': 0.75}


  _warn_prf(average, modifier, msg_start, len(result))
                                                       
 25%|██▍       | 600/2415 [9:43:56<19:07:47, 37.94s/it]Saving model checkpoint to ./model\checkpoint-600
Configuration saved in ./model\checkpoint-600\config.json


(6438, 2) (6438,)
(6438,) (6438,)
{'eval_loss': 0.6978518962860107, 'eval_pred_true': 0, 'eval_pred_false': 6438, 'eval_actual_true': 3177, 'eval_actual_false': 3261, 'eval_accuracy': 0.5065237651444549, 'eval_f1_score': 0.0, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_roc_auc': 0.5, 'eval_matthews_correlation': 0.0, 'eval_cohen_kappa': 0.0, 'eval_true_negative': 3261, 'eval_false_positive': 0, 'eval_false_negative': 3177, 'eval_true_positive': 0, 'eval_specificity': 1.0, 'eval_sensitivity': 0.0, 'eval_informedness': 0.0, 'eval_balanced_accuracy': 0.5, 'eval_runtime': 3358.8279, 'eval_samples_per_second': 1.917, 'eval_steps_per_second': 0.06, 'epoch': 0.75}


Model weights saved in ./model\checkpoint-600\pytorch_model.bin
tokenizer config file saved in ./model\checkpoint-600\tokenizer_config.json
Special tokens file saved in ./model\checkpoint-600\special_tokens_map.json
 29%|██▉       | 700/2415 [10:43:26<21:21:55, 44.85s/it]  ***** Running Evaluation *****
  Num examples = 6438
  Batch size = 32


{'loss': 0.6968, 'learning_rate': 4.033166875709291e-05, 'epoch': 0.87}


                                                        
 29%|██▉       | 700/2415 [11:37:03<21:21:55, 44.85s/it]Saving model checkpoint to ./model\checkpoint-700
Configuration saved in ./model\checkpoint-700\config.json


(6438, 2) (6438,)
(6438,) (6438,)
{'eval_loss': 0.6933590173721313, 'eval_pred_true': 6438, 'eval_pred_false': 0, 'eval_actual_true': 3177, 'eval_actual_false': 3261, 'eval_accuracy': 0.4934762348555452, 'eval_f1_score': 0.6608424336973479, 'eval_precision': 0.4934762348555452, 'eval_recall': 1.0, 'eval_roc_auc': 0.5, 'eval_matthews_correlation': 0.0, 'eval_cohen_kappa': 0.0, 'eval_true_negative': 0, 'eval_false_positive': 3261, 'eval_false_negative': 0, 'eval_true_positive': 3177, 'eval_specificity': 0.0, 'eval_sensitivity': 1.0, 'eval_informedness': 0.0, 'eval_balanced_accuracy': 0.5, 'eval_runtime': 3217.1583, 'eval_samples_per_second': 2.001, 'eval_steps_per_second': 0.063, 'epoch': 0.87}


Model weights saved in ./model\checkpoint-700\pytorch_model.bin
tokenizer config file saved in ./model\checkpoint-700\tokenizer_config.json
Special tokens file saved in ./model\checkpoint-700\special_tokens_map.json
 33%|███▎      | 800/2415 [12:10:41<7:24:44, 16.52s/it]    ***** Running Evaluation *****
  Num examples = 6438
  Batch size = 32


{'loss': 0.6946, 'learning_rate': 3.764055752534714e-05, 'epoch': 0.99}


                                                       
 33%|███▎      | 800/2415 [12:18:17<7:24:44, 16.52s/it]Saving model checkpoint to ./model\checkpoint-800
Configuration saved in ./model\checkpoint-800\config.json


(6438, 2) (6438,)
(6438,) (6438,)
{'eval_loss': 0.7012410163879395, 'eval_pred_true': 6438, 'eval_pred_false': 0, 'eval_actual_true': 3177, 'eval_actual_false': 3261, 'eval_accuracy': 0.4934762348555452, 'eval_f1_score': 0.6608424336973479, 'eval_precision': 0.4934762348555452, 'eval_recall': 1.0, 'eval_roc_auc': 0.5, 'eval_matthews_correlation': 0.0, 'eval_cohen_kappa': 0.0, 'eval_true_negative': 0, 'eval_false_positive': 3261, 'eval_false_negative': 0, 'eval_true_positive': 3177, 'eval_specificity': 0.0, 'eval_sensitivity': 1.0, 'eval_informedness': 0.0, 'eval_balanced_accuracy': 0.5, 'eval_runtime': 455.7198, 'eval_samples_per_second': 14.127, 'eval_steps_per_second': 0.443, 'epoch': 0.99}


Model weights saved in ./model\checkpoint-800\pytorch_model.bin
tokenizer config file saved in ./model\checkpoint-800\tokenizer_config.json
Special tokens file saved in ./model\checkpoint-800\special_tokens_map.json
 35%|███▌      | 849/2415 [12:29:59<15:50:49, 36.43s/it] 