In [1]:
%load_ext autoreload
%autoreload 2

In [7]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"

import torch
import numpy as np
import evaluate
from collections import defaultdict
from sklearn.metrics import f1_score, roc_auc_score, accuracy_score
from transformers import EvalPrediction, AutoTokenizer, TrainingArguments, Trainer, \
    DataCollatorForTokenClassification, DataCollatorWithPadding, \
    AutoModelForTokenClassification, AutoModelForSequenceClassification

from data_loader import DataLoader

# Выгрузка набора данных

In [26]:
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
tokenizer_params = dict(truncation=True)

loader = DataLoader("./data", tokenizer, tokenizer_params)
datasets = loader.get_data_for_experiment(domain="all", regime="large")
info_columns = ["text", "intents", "tagger", "tokens"]

In [11]:
args = TrainingArguments("distilbert-finetuned",
                        learning_rate=1e-5,
                        optim="adamw_torch_fused", # немного оптимизированный под ГПУ оптимизер
                        per_device_train_batch_size=16,
                        per_device_eval_batch_size=128, # поскольку на шаге eval не нужно считать loss, граф вычислений не строится и обычно можно использовать батч побольше
                        num_train_epochs=7,
                        weight_decay=0.01, # https://medium.com/unpackai/stay-away-from-overfitting-l2-norm-regularization-weight-decay-and-l1-norm-regularization-795bbc5cf958
                        warmup_steps = 100, # число шагов (батчей) за которые lr увеличивается от 0 до заданного нами
                        lr_scheduler_type = 'linear', # эта штука "постепенно" скручивает learning rate по ходу эпох. linear - дефолтное значение
                        evaluation_strategy="steps", # выбираем стратегию оценки качества модели - каждые n батчей (как альтернатива - по эпохам)
                        eval_steps=200, # раз в сколько батчей оцениваем
                        seed=42, # просто немного для стабильности
                        fp16=True, #  очень вполезная штука, заметно скоряет обучение, хотя работает только на GPU
                        logging_strategy='steps',
                        logging_steps=200, # как часто считаем лос на трейне)
                        save_strategy="no"
)

# Задача тегирования слотов

In [12]:
metric = evaluate.load('seqeval')
label_names = list(loader.tag2index.keys())

def compute_metrics(eval_preds):
    logits, labels = eval_preds

    predictions = np.argmax(logits, axis=-1)

    true_labels = [[label_names[l] for l in label if l!=-100] for label in labels]

    true_predictions = [[label_names[p] for p,l in zip(prediction, label) if l!=-100] 
                        for prediction, label in zip(predictions, labels)]

    all_metrics = metric.compute(predictions=true_predictions, references=true_labels)

    return {"precision": all_metrics['overall_precision'],
            "recall": all_metrics['overall_recall'],
            "f1": all_metrics['overall_f1'],
            "accuracy": all_metrics['overall_accuracy']}

In [33]:
tagger_result = defaultdict(list)

for fold_dataset in datasets.values():
    model = AutoModelForTokenClassification.from_pretrained(
        "distilbert-base-uncased",
        id2label=loader.index2tag,
        label2id=loader.tag2index
    ).to('cuda')
    
    train_dataset = fold_dataset['train'].remove_columns(info_columns + ["classification_labels"]).rename_column("tagging_labels", "labels")
    test_dataset = fold_dataset['test'].remove_columns(info_columns + ["classification_labels"]).rename_column("tagging_labels", "labels")

    trainer = Trainer(model=model,
                    args=args,
                    train_dataset = train_dataset,
                    eval_dataset = test_dataset,
                    data_collator=DataCollatorForTokenClassification(tokenizer=tokenizer),
                    compute_metrics=compute_metrics,
                    tokenizer=tokenizer)

    trainer.train()
    for name, val in trainer.evaluate().items():
        if val != 'epoch':
            tagger_result[name].append(val)

for name, val in tagger_result.items():
    print(name, np.mean(val))

Some weights of DistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
200,1.434,0.542689,0.272727,0.222222,0.244898,0.876749
400,0.3865,0.347166,0.410256,0.42328,0.416667,0.91006
600,0.2515,0.281186,0.468599,0.513228,0.489899,0.925383
800,0.1995,0.25615,0.518519,0.592593,0.553086,0.931712
1000,0.1654,0.243599,0.53211,0.613757,0.570025,0.938041
1200,0.1581,0.240034,0.545024,0.608466,0.575,0.941372


Some weights of DistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
200,1.5309,0.41618,0.340278,0.26776,0.299694,0.905299
400,0.3765,0.250426,0.463542,0.486339,0.474667,0.935717
600,0.2606,0.207767,0.54,0.590164,0.563969,0.945437
800,0.1983,0.182872,0.588542,0.617486,0.602667,0.951395
1000,0.1693,0.172753,0.627027,0.63388,0.630435,0.953277
1200,0.1523,0.170666,0.60733,0.63388,0.620321,0.954531


Some weights of DistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
200,1.5396,0.476949,0.299465,0.27451,0.286445,0.890484
400,0.373,0.293166,0.460829,0.490196,0.475059,0.922309
600,0.2573,0.221631,0.545455,0.617647,0.57931,0.938846
800,0.2004,0.190245,0.630137,0.676471,0.652482,0.944774
1000,0.1725,0.178316,0.643172,0.715686,0.677494,0.954134
1200,0.1566,0.1703,0.653333,0.720588,0.685315,0.956006


Some weights of DistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
200,1.5445,0.461088,0.358025,0.291457,0.32133,0.901222
400,0.3627,0.289304,0.49505,0.502513,0.498753,0.930955
600,0.2477,0.229398,0.61658,0.59799,0.607143,0.9445
800,0.2015,0.199199,0.617647,0.633166,0.62531,0.947803
1000,0.1718,0.179377,0.626214,0.648241,0.637037,0.95441
1200,0.1488,0.17434,0.666667,0.683417,0.674938,0.958044


Some weights of DistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
200,1.5274,0.538294,0.300518,0.254386,0.275534,0.875429
400,0.3742,0.326914,0.457031,0.513158,0.483471,0.925382
600,0.2512,0.26378,0.552124,0.627193,0.587269,0.937246
800,0.1923,0.231884,0.580645,0.631579,0.605042,0.944115
1000,0.1721,0.21739,0.608163,0.653509,0.630021,0.94911
1200,0.1516,0.210899,0.627049,0.671053,0.648305,0.952544


Some weights of DistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
200,1.5402,0.468921,0.409938,0.340206,0.371831,0.896494
400,0.3587,0.301781,0.43379,0.489691,0.460048,0.923873
600,0.2488,0.24358,0.542453,0.592784,0.566502,0.938898
800,0.1944,0.213891,0.606796,0.64433,0.625,0.949249
1000,0.1701,0.200669,0.592417,0.64433,0.617284,0.95025
1200,0.1445,0.195785,0.626794,0.675258,0.650124,0.951586


Some weights of DistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
200,1.534,0.434002,0.328571,0.233503,0.272997,0.900383
400,0.3716,0.279607,0.405941,0.416244,0.411028,0.929438
600,0.2513,0.227465,0.502415,0.527919,0.514851,0.941571
800,0.1978,0.199651,0.54717,0.588832,0.567237,0.948595
1000,0.1678,0.187613,0.541667,0.593909,0.566586,0.95083
1200,0.1475,0.183467,0.553991,0.598985,0.57561,0.952746


Some weights of DistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
200,1.5163,0.472772,0.306818,0.253521,0.277635,0.894095
400,0.3758,0.292818,0.466368,0.488263,0.477064,0.928113
600,0.2577,0.230776,0.625,0.633803,0.629371,0.94448
800,0.1988,0.204883,0.680556,0.690141,0.685315,0.95122
1000,0.1663,0.192658,0.728972,0.732394,0.730679,0.955392
1200,0.1527,0.189191,0.740566,0.737089,0.738824,0.956354


Some weights of DistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
200,1.5172,0.532347,0.29375,0.22488,0.254743,0.876196
400,0.3747,0.331449,0.442922,0.464115,0.453271,0.916023
600,0.2524,0.268936,0.543103,0.602871,0.571429,0.934856
800,0.1936,0.233684,0.591304,0.650718,0.61959,0.941957
1000,0.1676,0.221599,0.616071,0.660287,0.637413,0.946897
1200,0.1433,0.216085,0.606987,0.665072,0.634703,0.948441


Some weights of DistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
200,1.5245,0.466254,0.261538,0.2,0.226667,0.885667
400,0.3749,0.292795,0.359551,0.376471,0.367816,0.926
600,0.2467,0.241171,0.505556,0.535294,0.52,0.939
800,0.1935,0.215619,0.554348,0.6,0.576271,0.948667
1000,0.1624,0.207138,0.550265,0.611765,0.579387,0.951
1200,0.1507,0.202342,0.544503,0.611765,0.576177,0.954


eval_loss 0.19529844373464583
eval_precision 0.6173427144167403
eval_recall 0.6616213122720609
eval_f1 0.6384978663631674
eval_accuracy 0.9526286757642805
eval_runtime 0.19548000000000001
eval_samples_per_second 1578.8169
eval_steps_per_second 15.383700000000001
epoch 7.0


# Задача классификации интентов

In [45]:
# source: https://jesusleal.io/2021/04/21/Longformer-multilabel-classification/
def multi_label_metrics(predictions, labels, threshold=0.15):
    # first, apply sigmoid on predictions which are of shape (batch_size, num_labels)
    sigmoid = torch.nn.Sigmoid()
    probs = sigmoid(torch.Tensor(predictions))
    # next, use threshold to turn them into integer predictions
    y_pred = np.zeros(probs.shape)
    y_pred[np.where(probs >= threshold)] = 1
    # finally, compute metrics
    y_true = labels
    f1_micro_average = f1_score(y_true=y_true, y_pred=y_pred, average='micro')
    roc_auc = roc_auc_score(y_true, y_pred, average = 'micro')
    accuracy = accuracy_score(y_true, y_pred)
    # return as dictionary
    metrics = {'f1': f1_micro_average,
               'roc_auc': roc_auc,
               'accuracy': accuracy}
    return metrics

def compute_metrics(p: EvalPrediction):
    preds = p.predictions[0] if isinstance(p.predictions, 
            tuple) else p.predictions
    result = multi_label_metrics(
        predictions=preds, 
        labels=p.label_ids)
    return result

In [47]:
args = TrainingArguments("distilbert-finetuned",
                        learning_rate=1e-5,
                        optim="adamw_torch_fused", # немного оптимизированный под ГПУ оптимизер
                        per_device_train_batch_size=16,
                        per_device_eval_batch_size=128, # поскольку на шаге eval не нужно считать loss, граф вычислений не строится и обычно можно использовать батч побольше
                        num_train_epochs=15,
                        weight_decay=0.01, # https://medium.com/unpackai/stay-away-from-overfitting-l2-norm-regularization-weight-decay-and-l1-norm-regularization-795bbc5cf958
                        warmup_steps = 100, # число шагов (батчей) за которые lr увеличивается от 0 до заданного нами
                        lr_scheduler_type = 'linear', # эта штука "постепенно" скручивает learning rate по ходу эпох. linear - дефолтное значение
                        evaluation_strategy="steps", # выбираем стратегию оценки качества модели - каждые n батчей (как альтернатива - по эпохам)
                        eval_steps=200, # раз в сколько батчей оцениваем
                        seed=42, # просто немного для стабильности
                        fp16=True, #  очень вполезная штука, заметно скоряет обучение, хотя работает только на GPU
                        logging_strategy='steps',
                        logging_steps=200, # как часто считаем лос на трейне)
                        save_strategy="no"
)

In [48]:
classifier_result = defaultdict(list)

for fold_dataset in datasets.values():
    model = AutoModelForSequenceClassification.from_pretrained(
        "distilbert-base-uncased", 
        problem_type="multi_label_classification", 
        num_labels=len(loader.index2intent.keys()), 
        id2label=loader.index2intent, 
        label2id=loader.intent2index
    ).to('cuda')
    
    train_dataset = fold_dataset['train'].remove_columns(info_columns + ["tagging_labels"]).rename_column("classification_labels", "labels")
    test_dataset = fold_dataset['test'].remove_columns(info_columns + ["tagging_labels"]).rename_column("classification_labels", "labels")

    trainer = Trainer(model=model,
                    args=args,
                    train_dataset = train_dataset,
                    eval_dataset = test_dataset,
                    data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
                    compute_metrics=compute_metrics,
                    tokenizer=tokenizer)

    trainer.train()
    for name, val in trainer.evaluate().items():
        if val != 'epoch':
            classifier_result[name].append(val)

for name, val in classifier_result.items():
    print(name, np.mean(val))

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias', 'pre_classifier.weight', 'pre_classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,F1,Roc Auc,Accuracy
200,0.4806,0.24015,0.079361,0.570263,0.0
400,0.1777,0.143087,0.172879,0.569471,0.006579
600,0.1395,0.134577,0.181656,0.567694,0.013158
800,0.1328,0.127528,0.298201,0.623162,0.108553
1000,0.1257,0.118198,0.405728,0.684469,0.141447
1200,0.1169,0.109975,0.446964,0.701589,0.154605
1400,0.1088,0.103476,0.477237,0.722568,0.157895
1600,0.1021,0.098108,0.506709,0.736135,0.184211
1800,0.0986,0.094011,0.542722,0.753528,0.207237
2000,0.0946,0.091234,0.554949,0.763244,0.217105


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias', 'pre_classifier.weight', 'pre_classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,F1,Roc Auc,Accuracy
200,0.4793,0.237757,0.080583,0.573563,0.0
400,0.1782,0.14439,0.166275,0.566584,0.009772
600,0.1391,0.135805,0.169872,0.567307,0.013029
800,0.1342,0.130098,0.260943,0.607289,0.110749
1000,0.1268,0.121629,0.376972,0.670937,0.14658
1200,0.1186,0.114199,0.428693,0.682931,0.166124
1400,0.11,0.106921,0.449074,0.710913,0.159609
1600,0.1036,0.101838,0.49098,0.728706,0.179153
1800,0.0993,0.098082,0.510938,0.739537,0.192182
2000,0.0934,0.095168,0.532081,0.749772,0.205212


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias', 'pre_classifier.weight', 'pre_classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,F1,Roc Auc,Accuracy
200,0.4797,0.235692,0.077351,0.570433,0.0
400,0.1764,0.140716,0.179687,0.574519,0.035144
600,0.1411,0.132334,0.180251,0.574624,0.035144
800,0.1338,0.126795,0.2656,0.615634,0.102236
1000,0.1278,0.119196,0.369284,0.655977,0.162939
1200,0.1181,0.11099,0.438333,0.693648,0.191693
1400,0.1108,0.104097,0.47452,0.710333,0.198083
1600,0.1043,0.098947,0.505728,0.729446,0.207668
1800,0.0977,0.094545,0.525979,0.7445,0.226837
2000,0.0944,0.091688,0.534776,0.756816,0.226837


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias', 'pre_classifier.weight', 'pre_classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,F1,Roc Auc,Accuracy
200,0.4845,0.238182,0.07502,0.564939,0.0
400,0.1768,0.140295,0.18328,0.575986,0.013115
600,0.1403,0.1304,0.210797,0.585343,0.019672
800,0.1312,0.122114,0.353883,0.662456,0.144262
1000,0.1219,0.112941,0.425837,0.699553,0.170492
1200,0.113,0.105452,0.453901,0.716147,0.163934
1400,0.1058,0.09941,0.493827,0.727313,0.196721
1600,0.0998,0.094272,0.529791,0.750058,0.219672
1800,0.0948,0.090548,0.539361,0.762714,0.239344
2000,0.0925,0.087779,0.555645,0.764439,0.242623


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias', 'pre_classifier.weight', 'pre_classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,F1,Roc Auc,Accuracy
200,0.4792,0.237557,0.080913,0.575379,0.0
400,0.1758,0.143919,0.175385,0.570929,0.009585
600,0.1402,0.135977,0.183051,0.569484,0.01278
800,0.1337,0.130173,0.255183,0.607368,0.079872
1000,0.1273,0.122621,0.363636,0.651991,0.127796
1200,0.1179,0.114443,0.421561,0.685779,0.153355
1400,0.1102,0.107613,0.462963,0.713491,0.15655
1600,0.1041,0.102628,0.492163,0.724739,0.175719
1800,0.0979,0.098102,0.516418,0.747568,0.185304
2000,0.0942,0.095276,0.521933,0.751266,0.194888


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias', 'pre_classifier.weight', 'pre_classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,F1,Roc Auc,Accuracy
200,0.4844,0.238638,0.075131,0.567248,0.0
400,0.1768,0.140853,0.155449,0.562211,0.0
600,0.1398,0.131897,0.164479,0.562606,0.009836
800,0.1311,0.123627,0.324022,0.648002,0.108197
1000,0.1218,0.114805,0.402597,0.685045,0.137705
1200,0.1121,0.107343,0.43418,0.710802,0.15082
1400,0.1049,0.10156,0.462783,0.71574,0.157377
1600,0.0987,0.09698,0.482977,0.730415,0.167213
1800,0.0938,0.093262,0.503115,0.744441,0.177049
2000,0.0918,0.090586,0.528505,0.760277,0.180328


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias', 'pre_classifier.weight', 'pre_classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,F1,Roc Auc,Accuracy
200,0.4793,0.23719,0.077614,0.568792,0.0
400,0.1778,0.142205,0.156372,0.562547,0.00641
600,0.1399,0.133307,0.162829,0.563415,0.009615
800,0.1344,0.127589,0.261843,0.606715,0.092949
1000,0.127,0.119548,0.365642,0.667047,0.11859
1200,0.1181,0.111856,0.441202,0.689526,0.137821
1400,0.1112,0.104625,0.487116,0.716954,0.141026
1600,0.103,0.09941,0.505728,0.729089,0.166667
1800,0.0989,0.095508,0.537954,0.742776,0.176282
2000,0.0949,0.092769,0.556106,0.751463,0.195513


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias', 'pre_classifier.weight', 'pre_classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,F1,Roc Auc,Accuracy
200,0.4845,0.240511,0.077677,0.563989,0.0
400,0.1765,0.143814,0.176935,0.57037,0.006494
600,0.1395,0.134167,0.191972,0.572034,0.019481
800,0.13,0.124813,0.359846,0.662722,0.116883
1000,0.1214,0.114935,0.43379,0.702236,0.136364
1200,0.1122,0.10708,0.48125,0.720845,0.155844
1400,0.1047,0.100707,0.512896,0.749575,0.159091
1600,0.0993,0.09578,0.54784,0.756591,0.162338
1800,0.094,0.091912,0.568821,0.770706,0.178571
2000,0.0897,0.089463,0.584687,0.774372,0.194805


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias', 'pre_classifier.weight', 'pre_classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,F1,Roc Auc,Accuracy
200,0.4802,0.237426,0.077073,0.565179,0.0
400,0.1778,0.14317,0.160126,0.563935,0.009646
600,0.1406,0.133919,0.1595,0.563803,0.009646
800,0.1352,0.127934,0.246039,0.592831,0.099678
1000,0.1263,0.119437,0.386489,0.677186,0.115756
1200,0.119,0.110803,0.442636,0.698791,0.128617
1400,0.1115,0.104207,0.495604,0.728089,0.157556
1600,0.1048,0.098666,0.524073,0.744965,0.167203
1800,0.0997,0.094573,0.535385,0.756674,0.167203
2000,0.0945,0.091521,0.553544,0.770927,0.163987


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias', 'pre_classifier.weight', 'pre_classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss,F1,Roc Auc,Accuracy
200,0.4789,0.235342,0.081107,0.579582,0.0
400,0.1765,0.142114,0.161989,0.564857,0.009934
600,0.1403,0.133721,0.163035,0.565075,0.009934
800,0.1335,0.127894,0.264865,0.605604,0.129139
1000,0.1271,0.119429,0.400998,0.678754,0.142384
1200,0.1179,0.111185,0.439105,0.697933,0.15894
1400,0.1104,0.104241,0.483165,0.716155,0.182119
1600,0.1031,0.098903,0.513036,0.730615,0.198675
1800,0.0986,0.094453,0.531932,0.748624,0.208609
2000,0.0945,0.09156,0.571429,0.765852,0.228477


eval_loss 0.0880474478006363
eval_f1 0.5691238609274476
eval_roc_auc 0.7722500363992733
eval_accuracy 0.21476230624151418
eval_runtime 0.15300999999999998
eval_samples_per_second 2022.3902999999998
eval_steps_per_second 19.7069
epoch 15.0
