SC : https://jesusleal.io/2020/10/20/RoBERTA-Text-Classification/

https://github.com/jlealtru/website_tutorials/blob/main/notebooks/RoBERTA%20with%20IMDB.ipynb

In [1]:
import pandas as pd
import datasets
from transformers import RobertaTokenizerFast, RobertaForSequenceClassification,Trainer, TrainingArguments
import torch.nn as nn
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from tqdm import tqdm
import wandb
import os


In [2]:
train_data, test_data = datasets.load_dataset('imdb', split =['train', 'test'])

In [3]:
train_df = pd.DataFrame(train_data)
test_df = pd.DataFrame(test_data)

In [4]:
# Display the first few rows of the DataFrame
train_df

Unnamed: 0,text,label
0,I rented I AM CURIOUS-YELLOW from my video sto...,0
1,"""I Am Curious: Yellow"" is a risible and preten...",0
2,If only to avoid making this type of film in t...,0
3,This film was probably inspired by Godard's Ma...,0
4,"Oh, brother...after hearing about this ridicul...",0
...,...,...
24995,A hit at the time but now better categorised a...,1
24996,I love this movie like no other. Another time ...,1
24997,This film and it's sequel Barry Mckenzie holds...,1
24998,'The Adventures Of Barry McKenzie' started lif...,1


In [7]:
import pandas as pd
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from collections import Counter

# Load dataset
data = train_df

# Load stopwords
stop_words = set(stopwords.words('english'))

# Preprocessing and tokenization function
def preprocess(text):
    words = word_tokenize(text.lower())  # Tokenization and lowercasing
    words = [word for word in words if word.isalpha() and word not in stop_words]  # Remove non-alphabet and stopwords
    return words

# Separate data based on labels
label_0_data = data[data['label'] == 0]
label_1_data = data[data['label'] == 1]

# Count word frequencies
label_0_words = []
label_1_words = []

for text in label_0_data['text']:
    label_0_words.extend(preprocess(text))
    
for text in label_1_data['text']:
    label_1_words.extend(preprocess(text))

# Calculate word frequencies
label_0_word_freq = Counter(label_0_words)
label_1_word_freq = Counter(label_1_words)

# Get most common words
n_most_common = 10  # You can adjust this value
most_common_label_0 = label_0_word_freq.most_common(n_most_common)
most_common_label_1 = label_1_word_freq.most_common(n_most_common)

print("Most common words for Label 0:", most_common_label_0)
print("Most common words for Label 1:", most_common_label_1)

Most common words for Label 0: [('br', 52636), ('movie', 24097), ('film', 18474), ('one', 12614), ('like', 10967), ('would', 7672), ('even', 7664), ('good', 7206), ('bad', 7139), ('really', 6240)]
Most common words for Label 1: [('br', 49235), ('film', 20284), ('movie', 18498), ('one', 13279), ('like', 8778), ('good', 7452), ('story', 6562), ('great', 6327), ('time', 6051), ('see', 5872)]


In [19]:
# load model and tokenizer and define length of the text sequence
model = RobertaForSequenceClassification.from_pretrained('roberta-base')
tokenizer = RobertaTokenizerFast.from_pretrained('roberta-base', max_length = 512)


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [20]:
# define a function that will tokenize the model, and will return the relevant inputs for the model
def tokenization(batched_text):
    return tokenizer(batched_text['text'], padding = True, truncation=True)


train_data = train_data.map(tokenization, batched = True, batch_size = len(train_data))
test_data = test_data.map(tokenization, batched = True, batch_size = len(test_data))


In [21]:
train_data.set_format('torch', columns=['input_ids', 'attention_mask', 'label'])
test_data.set_format('torch', columns=['input_ids', 'attention_mask', 'label'])


In [22]:
train_data

Dataset({
    features: ['text', 'label', 'input_ids', 'attention_mask'],
    num_rows: 25000
})

In [23]:
train_df = pd.DataFrame(train_data)
test_df = pd.DataFrame(test_data)

In [24]:
train_df

Unnamed: 0,label,input_ids,attention_mask
0,tensor(0),"[tensor(0), tensor(100), tensor(16425), tensor...","[tensor(1), tensor(1), tensor(1), tensor(1), t..."
1,tensor(0),"[tensor(0), tensor(113), tensor(100), tensor(1...","[tensor(1), tensor(1), tensor(1), tensor(1), t..."
2,tensor(0),"[tensor(0), tensor(1106), tensor(129), tensor(...","[tensor(1), tensor(1), tensor(1), tensor(1), t..."
3,tensor(0),"[tensor(0), tensor(713), tensor(822), tensor(2...","[tensor(1), tensor(1), tensor(1), tensor(1), t..."
4,tensor(0),"[tensor(0), tensor(7516), tensor(6), tensor(21...","[tensor(1), tensor(1), tensor(1), tensor(1), t..."
...,...,...,...
24995,tensor(1),"[tensor(0), tensor(250), tensor(478), tensor(2...","[tensor(1), tensor(1), tensor(1), tensor(1), t..."
24996,tensor(1),"[tensor(0), tensor(100), tensor(657), tensor(4...","[tensor(1), tensor(1), tensor(1), tensor(1), t..."
24997,tensor(1),"[tensor(0), tensor(713), tensor(822), tensor(8...","[tensor(1), tensor(1), tensor(1), tensor(1), t..."
24998,tensor(1),"[tensor(0), tensor(108), tensor(133), tensor(3...","[tensor(1), tensor(1), tensor(1), tensor(1), t..."


Trainer Helper Class

In [11]:
# define accuracy metrics
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }


In [12]:
# define the training arguments
training_args = TrainingArguments(
    output_dir = 'results',
    num_train_epochs=3,
    per_device_train_batch_size = 4,
    gradient_accumulation_steps = 16,    
    per_device_eval_batch_size= 8,
    evaluation_strategy = "epoch",
    save_strategy="epoch",              # addition
    disable_tqdm = False, 
    load_best_model_at_end=True,
    warmup_steps=500,
    weight_decay=0.01,
    logging_steps = 8,
    fp16 = True,
    logging_dir='/media/jlealtru/data_files/github/website_tutorials/logs',
    dataloader_num_workers = 4,
    run_name = 'roberta-classification'
)


In [13]:
# instantiate the trainer class and check for available devices
trainer = Trainer(
    model=model,
    args=training_args,
    compute_metrics=compute_metrics,
    train_dataset=train_data,
    eval_dataset=test_data
)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device


'cuda'

In [15]:
# train the model
trainer.train()


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…

  1%|          | 8/1170 [00:30<50:59,  2.63s/it]  

{'loss': 0.0, 'learning_rate': 1.0000000000000001e-07, 'epoch': 0.02}


  1%|▏         | 16/1170 [00:49<47:57,  2.49s/it]

{'loss': 0.0, 'learning_rate': 1.0000000000000001e-07, 'epoch': 0.04}


  2%|▏         | 20/1170 [00:59<48:57,  2.55s/it][34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 2.4 seconds.), retrying request
  2%|▏         | 24/1170 [01:10<49:44,  2.60s/it]

{'loss': 0.0, 'learning_rate': 1.0000000000000001e-07, 'epoch': 0.06}


  2%|▏         | 25/1170 [01:12<49:05,  2.57s/it][34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 2.3 seconds.), retrying request
  3%|▎         | 31/1170 [01:28<48:03,  2.53s/it][34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 4.5 seconds.), retrying request
[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 2.5 seconds.), retrying request
[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 2.0 seconds.), retrying request
[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 4.1 seconds.), retrying request
  3%|▎         | 32/1170 [01:30<47:55,  2.53s/it]

{'loss': 0.0, 'learning_rate': 1.0000000000000001e-07, 'epoch': 0.08}


  3%|▎         | 37/1170 [01:43<48:36,  2.57s/it][34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 2.1 seconds.), retrying request
[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 4.6 seconds.), retrying request
[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 2.3 seconds.), retrying request
  3%|▎         | 40/1170 [01:51<48:52,  2.60s/it]

{'loss': 0.0, 'learning_rate': 1.0000000000000001e-07, 'epoch': 0.1}


  4%|▎         | 43/1170 [01:59<50:12,  2.67s/it][34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 2.2 seconds.), retrying request
[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 4.4 seconds.), retrying request
  4%|▍         | 48/1170 [02:13<51:01,  2.73s/it]

{'loss': 0.0, 'learning_rate': 1.0000000000000001e-07, 'epoch': 0.12}


[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 2.3 seconds.), retrying request
[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 2.0 seconds.), retrying request
[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 5.0 seconds.), retrying request
  5%|▍         | 54/1170 [02:28<48:59,  2.63s/it][34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 2.3 seconds.), retrying request
[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 4.7 seconds.), retrying request
[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 2.1 seconds.), retrying request
  5%|▍         | 56/1170 [02:34<50:49,  2.74s/it]

{'loss': 0.0, 'learning_rate': 1.0000000000000001e-07, 'epoch': 0.14}


  5%|▌         | 64/1170 [02:56<49:30,  2.69s/it]

{'loss': 0.0, 'learning_rate': 1.0000000000000001e-07, 'epoch': 0.16}


  6%|▌         | 65/1170 [02:58<49:22,  2.68s/it][34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 2.3 seconds.), retrying request
  6%|▌         | 70/1170 [03:12<51:10,  2.79s/it][34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 4.9 seconds.), retrying request
[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 2.2 seconds.), retrying request
[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 2.1 seconds.), retrying request
  6%|▌         | 72/1170 [03:18<52:49,  2.89s/it]

{'loss': 0.0, 'learning_rate': 1.0000000000000001e-07, 'epoch': 0.18}


  6%|▋         | 75/1170 [03:27<55:07,  3.02s/it][34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 4.9 seconds.), retrying request
[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 2.2 seconds.), retrying request
[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 4.7 seconds.), retrying request
  7%|▋         | 80/1170 [03:42<53:12,  2.93s/it]

{'loss': 0.0, 'learning_rate': 1.0000000000000001e-07, 'epoch': 0.2}


[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 2.3 seconds.), retrying request
[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 4.3 seconds.), retrying request
[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 2.3 seconds.), retrying request
[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 4.9 seconds.), retrying request
  7%|▋         | 86/1170 [03:59<54:07,  3.00s/it][34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 2.2 seconds.), retrying request
[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 4.9 seconds.), retrying request
[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 2.2 seconds.), retrying request
[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 4.2 seconds.), retrying request
  8%|▊         | 88/1170 [04:06<55:25,  3.07s/i

{'loss': 0.0, 'learning_rate': 1.0000000000000001e-07, 'epoch': 0.23}


  8%|▊         | 90/1170 [04:12<54:10,  3.01s/it][34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 2.3 seconds.), retrying request
  8%|▊         | 96/1170 [04:30<55:20,  3.09s/it]

{'loss': 0.0, 'learning_rate': 1.0000000000000001e-07, 'epoch': 0.25}


  9%|▉         | 104/1170 [04:53<50:41,  2.85s/it]

{'loss': 0.0, 'learning_rate': 1.0000000000000001e-07, 'epoch': 0.27}


 10%|▉         | 112/1170 [05:16<51:18,  2.91s/it]

{'loss': 0.0, 'learning_rate': 1.0000000000000001e-07, 'epoch': 0.29}


 10%|▉         | 116/1170 [05:28<50:58,  2.90s/it][34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 2.3 seconds.), retrying request
[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 4.6 seconds.), retrying request
[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 8.1 seconds.), retrying request
 10%|█         | 120/1170 [05:39<50:56,  2.91s/it]

{'loss': 0.0, 'learning_rate': 1.0000000000000001e-07, 'epoch': 0.31}


 11%|█         | 126/1170 [05:58<55:10,  3.17s/it][34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 2.2 seconds.), retrying request
[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 4.7 seconds.), retrying request
[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 9.6 seconds.), retrying request
 11%|█         | 128/1170 [06:04<55:13,  3.18s/it]

{'loss': 0.0, 'learning_rate': 1.0000000000000001e-07, 'epoch': 0.33}


 12%|█▏        | 136/1170 [06:30<54:22,  3.16s/it][34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 2.1 seconds.), retrying request
[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 4.9 seconds.), retrying request
[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 2.3 seconds.), retrying request
[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 4.6 seconds.), retrying request


{'loss': 0.0, 'learning_rate': 1.0000000000000001e-07, 'epoch': 0.35}


 12%|█▏        | 140/1170 [06:42<54:11,  3.16s/it][34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 8.0 seconds.), retrying request
 12%|█▏        | 144/1170 [06:55<54:09,  3.17s/it]

{'loss': 0.0, 'learning_rate': 1.0000000000000001e-07, 'epoch': 0.37}


 12%|█▏        | 145/1170 [06:58<54:52,  3.21s/it][34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 2.2 seconds.), retrying request
[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 4.0 seconds.), retrying request
[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 2.0 seconds.), retrying request
[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 4.3 seconds.), retrying request
 13%|█▎        | 150/1170 [07:15<54:55,  3.23s/it][34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 9.6 seconds.), retrying request
 13%|█▎        | 152/1170 [07:21<53:51,  3.17s/it]

{'loss': 0.0, 'learning_rate': 1.0000000000000001e-07, 'epoch': 0.39}


 13%|█▎        | 154/1170 [07:27<53:18,  3.15s/it][34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 2.4 seconds.), retrying request
[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 4.6 seconds.), retrying request
[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 9.2 seconds.), retrying request
 14%|█▎        | 160/1170 [07:46<52:30,  3.12s/it]

{'loss': 0.0, 'learning_rate': 1.0000000000000001e-07, 'epoch': 0.41}


 14%|█▍        | 164/1170 [07:58<52:14,  3.12s/it][34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 2.0 seconds.), retrying request
[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 2.3 seconds.), retrying request
[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 4.7 seconds.), retrying request
 14%|█▍        | 168/1170 [08:11<52:14,  3.13s/it]

{'loss': 0.0, 'learning_rate': 3.0000000000000004e-07, 'epoch': 0.43}


 14%|█▍        | 169/1170 [08:14<52:14,  3.13s/it][34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 8.9 seconds.), retrying request
 15%|█▍        | 173/1170 [08:27<53:59,  3.25s/it][34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 2.3 seconds.), retrying request
[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 2.3 seconds.), retrying request
[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 4.3 seconds.), retrying request
 15%|█▌        | 176/1170 [08:37<53:24,  3.22s/it]

{'loss': 0.0, 'learning_rate': 1.1e-06, 'epoch': 0.45}


 15%|█▌        | 178/1170 [08:43<52:57,  3.20s/it][34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 9.9 seconds.), retrying request
 16%|█▌        | 183/1170 [08:59<52:45,  3.21s/it][34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 2.5 seconds.), retrying request
[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 5.0 seconds.), retrying request
 16%|█▌        | 184/1170 [09:02<52:36,  3.20s/it]

{'loss': 0.0, 'learning_rate': 1.9e-06, 'epoch': 0.47}


 16%|█▌        | 187/1170 [09:12<52:36,  3.21s/it][34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 2.1 seconds.), retrying request
[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 4.6 seconds.), retrying request
[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 9.2 seconds.), retrying request
 16%|█▋        | 192/1170 [09:28<52:01,  3.19s/it]

{'loss': 0.0, 'learning_rate': 2.7e-06, 'epoch': 0.49}


[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 2.4 seconds.), retrying request
[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 4.6 seconds.), retrying request
 17%|█▋        | 197/1170 [09:44<51:33,  3.18s/it][34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 8.7 seconds.), retrying request
[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 2.5 seconds.), retrying request
 17%|█▋        | 200/1170 [09:53<51:26,  3.18s/it]

{'loss': 0.0, 'learning_rate': 3.5000000000000004e-06, 'epoch': 0.51}


 17%|█▋        | 202/1170 [10:00<51:19,  3.18s/it][34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 4.6 seconds.), retrying request
[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 8.6 seconds.), retrying request
 18%|█▊        | 206/1170 [10:12<51:05,  3.18s/it][34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 2.4 seconds.), retrying request
[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 4.4 seconds.), retrying request
[34m[1mwandb[0m: 429 encountered (Filestream rate limit exceeded, retrying in 9.9 seconds.), retrying request
 18%|█▊        | 208/1170 [10:19<50:57,  3.18s/it]

{'loss': 0.0, 'learning_rate': 4.2999999999999995e-06, 'epoch': 0.53}


 18%|█▊        | 216/1170 [10:45<50:50,  3.20s/it]

{'loss': 0.0, 'learning_rate': 5.1e-06, 'epoch': 0.55}


 19%|█▉        | 224/1170 [11:10<51:05,  3.24s/it]

{'loss': 0.0, 'learning_rate': 5.9e-06, 'epoch': 0.57}


 20%|█▉        | 232/1170 [11:35<48:40,  3.11s/it]

{'loss': 0.0, 'learning_rate': 6.700000000000001e-06, 'epoch': 0.59}


 21%|██        | 240/1170 [12:00<48:13,  3.11s/it]

{'loss': 0.0, 'learning_rate': 7.5e-06, 'epoch': 0.61}


 21%|██        | 248/1170 [12:25<47:47,  3.11s/it]

{'loss': 0.0, 'learning_rate': 8.3e-06, 'epoch': 0.63}


 22%|██▏       | 256/1170 [12:50<47:25,  3.11s/it]

{'loss': 0.0, 'learning_rate': 9.100000000000001e-06, 'epoch': 0.66}


 23%|██▎       | 264/1170 [13:16<50:36,  3.35s/it]

{'loss': 0.0, 'learning_rate': 9.900000000000002e-06, 'epoch': 0.68}


 23%|██▎       | 272/1170 [13:41<48:15,  3.22s/it]

{'loss': 0.0, 'learning_rate': 1.0700000000000001e-05, 'epoch': 0.7}


 24%|██▍       | 280/1170 [14:06<47:04,  3.17s/it]

{'loss': 0.0, 'learning_rate': 1.1500000000000002e-05, 'epoch': 0.72}


 25%|██▍       | 288/1170 [14:32<47:08,  3.21s/it]

{'loss': 0.0, 'learning_rate': 1.23e-05, 'epoch': 0.74}


 25%|██▌       | 296/1170 [14:58<46:26,  3.19s/it]

{'loss': 0.0, 'learning_rate': 1.3100000000000002e-05, 'epoch': 0.76}


 26%|██▌       | 304/1170 [15:23<45:25,  3.15s/it]

{'loss': 0.0, 'learning_rate': 1.3900000000000002e-05, 'epoch': 0.78}


 27%|██▋       | 312/1170 [15:48<45:08,  3.16s/it]

{'loss': 0.0, 'learning_rate': 1.47e-05, 'epoch': 0.8}


 27%|██▋       | 320/1170 [16:15<48:16,  3.41s/it]

{'loss': 0.0, 'learning_rate': 1.55e-05, 'epoch': 0.82}


 28%|██▊       | 328/1170 [16:41<44:23,  3.16s/it]

{'loss': 0.0, 'learning_rate': 1.63e-05, 'epoch': 0.84}


 29%|██▊       | 336/1170 [17:06<43:26,  3.12s/it]

{'loss': 0.0, 'learning_rate': 1.7100000000000002e-05, 'epoch': 0.86}


 29%|██▉       | 344/1170 [17:31<42:43,  3.10s/it]

{'loss': 0.0, 'learning_rate': 1.79e-05, 'epoch': 0.88}


 30%|███       | 352/1170 [17:55<41:06,  3.01s/it]

{'loss': 0.0, 'learning_rate': 1.87e-05, 'epoch': 0.9}


 31%|███       | 360/1170 [18:20<42:08,  3.12s/it]

{'loss': 0.0, 'learning_rate': 1.9500000000000003e-05, 'epoch': 0.92}


 31%|███▏      | 368/1170 [18:45<42:05,  3.15s/it]

{'loss': 0.0, 'learning_rate': 2.0300000000000002e-05, 'epoch': 0.94}


 32%|███▏      | 376/1170 [19:11<41:35,  3.14s/it]

{'loss': 0.0, 'learning_rate': 2.11e-05, 'epoch': 0.96}


 33%|███▎      | 384/1170 [19:36<41:10,  3.14s/it]

{'loss': 0.0, 'learning_rate': 2.19e-05, 'epoch': 0.98}


  _warn_prf(average, modifier, msg_start, len(result))
                                                  
 33%|███▎      | 390/1170 [27:14<41:54,  3.22s/it] 

{'eval_loss': nan, 'eval_accuracy': 0.5, 'eval_f1': 0.0, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_runtime': 435.1078, 'eval_samples_per_second': 57.457, 'eval_steps_per_second': 7.182, 'epoch': 1.0}


 34%|███▎      | 392/1170 [27:38<21:22:56, 98.94s/it] 

{'loss': 0.0, 'learning_rate': 2.2700000000000003e-05, 'epoch': 1.0}


 34%|███▍      | 400/1170 [28:00<1:47:15,  8.36s/it] 

{'loss': 0.0, 'learning_rate': 2.35e-05, 'epoch': 1.02}


 35%|███▍      | 408/1170 [28:24<43:27,  3.42s/it]  

{'loss': 0.0, 'learning_rate': 2.43e-05, 'epoch': 1.04}


 36%|███▌      | 416/1170 [28:49<39:23,  3.14s/it]

{'loss': 0.0, 'learning_rate': 2.51e-05, 'epoch': 1.06}


 36%|███▌      | 424/1170 [29:14<38:52,  3.13s/it]

{'loss': 0.0, 'learning_rate': 2.5900000000000003e-05, 'epoch': 1.09}


 37%|███▋      | 432/1170 [29:39<38:18,  3.11s/it]

{'loss': 0.0, 'learning_rate': 2.6700000000000002e-05, 'epoch': 1.11}


 38%|███▊      | 440/1170 [30:05<40:31,  3.33s/it]

{'loss': 0.0, 'learning_rate': 2.7500000000000004e-05, 'epoch': 1.13}


 38%|███▊      | 448/1170 [30:31<39:20,  3.27s/it]

{'loss': 0.0, 'learning_rate': 2.83e-05, 'epoch': 1.15}


 39%|███▉      | 456/1170 [30:57<38:55,  3.27s/it]

{'loss': 0.0, 'learning_rate': 2.91e-05, 'epoch': 1.17}


 40%|███▉      | 464/1170 [31:23<37:39,  3.20s/it]

{'loss': 0.0, 'learning_rate': 2.9900000000000002e-05, 'epoch': 1.19}


 40%|████      | 472/1170 [31:48<36:18,  3.12s/it]

{'loss': 0.0, 'learning_rate': 3.07e-05, 'epoch': 1.21}


 41%|████      | 480/1170 [32:13<35:47,  3.11s/it]

{'loss': 0.0, 'learning_rate': 3.15e-05, 'epoch': 1.23}


 42%|████▏     | 488/1170 [32:38<35:40,  3.14s/it]

{'loss': 0.0, 'learning_rate': 3.2300000000000006e-05, 'epoch': 1.25}


 42%|████▏     | 496/1170 [33:03<35:11,  3.13s/it]

{'loss': 0.0, 'learning_rate': 3.3100000000000005e-05, 'epoch': 1.27}


 43%|████▎     | 504/1170 [33:28<34:51,  3.14s/it]

{'loss': 0.0, 'learning_rate': 3.3900000000000004e-05, 'epoch': 1.29}


 44%|████▍     | 512/1170 [33:54<34:25,  3.14s/it]

{'loss': 0.0, 'learning_rate': 3.4699999999999996e-05, 'epoch': 1.31}


 44%|████▍     | 520/1170 [34:19<33:52,  3.13s/it]

{'loss': 0.0, 'learning_rate': 3.55e-05, 'epoch': 1.33}


 45%|████▌     | 528/1170 [34:44<33:31,  3.13s/it]

{'loss': 0.0, 'learning_rate': 3.63e-05, 'epoch': 1.35}


 46%|████▌     | 536/1170 [35:09<33:14,  3.15s/it]

{'loss': 0.0, 'learning_rate': 3.71e-05, 'epoch': 1.37}


 46%|████▋     | 544/1170 [35:34<32:47,  3.14s/it]

{'loss': 0.0, 'learning_rate': 3.79e-05, 'epoch': 1.39}


 47%|████▋     | 552/1170 [35:59<32:26,  3.15s/it]

{'loss': 0.0, 'learning_rate': 3.8700000000000006e-05, 'epoch': 1.41}


 48%|████▊     | 560/1170 [36:24<31:52,  3.14s/it]

{'loss': 0.0, 'learning_rate': 3.9500000000000005e-05, 'epoch': 1.43}


 49%|████▊     | 568/1170 [36:49<31:31,  3.14s/it]

{'loss': 0.0, 'learning_rate': 4.0300000000000004e-05, 'epoch': 1.45}


 49%|████▉     | 576/1170 [37:14<31:03,  3.14s/it]

{'loss': 0.0, 'learning_rate': 4.11e-05, 'epoch': 1.47}


 50%|████▉     | 584/1170 [37:41<31:36,  3.24s/it]

{'loss': 0.0, 'learning_rate': 4.19e-05, 'epoch': 1.5}


 51%|█████     | 592/1170 [38:06<30:15,  3.14s/it]

{'loss': 0.0, 'learning_rate': 4.27e-05, 'epoch': 1.52}


 51%|█████▏    | 600/1170 [38:31<29:52,  3.14s/it]

{'loss': 0.0, 'learning_rate': 4.35e-05, 'epoch': 1.54}


 52%|█████▏    | 608/1170 [38:56<29:21,  3.13s/it]

{'loss': 0.0, 'learning_rate': 4.43e-05, 'epoch': 1.56}


 53%|█████▎    | 616/1170 [39:21<29:08,  3.16s/it]

{'loss': 0.0, 'learning_rate': 4.5100000000000005e-05, 'epoch': 1.58}


 53%|█████▎    | 624/1170 [39:46<28:31,  3.13s/it]

{'loss': 0.0, 'learning_rate': 4.5900000000000004e-05, 'epoch': 1.6}


 54%|█████▍    | 632/1170 [40:11<28:06,  3.13s/it]

{'loss': 0.0, 'learning_rate': 4.6700000000000003e-05, 'epoch': 1.62}


 55%|█████▍    | 640/1170 [40:36<27:42,  3.14s/it]

{'loss': 0.0, 'learning_rate': 4.75e-05, 'epoch': 1.64}


 55%|█████▌    | 648/1170 [41:02<27:18,  3.14s/it]

{'loss': 0.0, 'learning_rate': 4.83e-05, 'epoch': 1.66}


 56%|█████▌    | 656/1170 [41:27<26:49,  3.13s/it]

{'loss': 0.0, 'learning_rate': 4.91e-05, 'epoch': 1.68}


 57%|█████▋    | 664/1170 [41:52<26:31,  3.14s/it]

{'loss': 0.0, 'learning_rate': 4.99e-05, 'epoch': 1.7}


 57%|█████▋    | 672/1170 [42:17<26:05,  3.14s/it]

{'loss': 0.0, 'learning_rate': 4.9477611940298504e-05, 'epoch': 1.72}


 58%|█████▊    | 680/1170 [42:42<25:43,  3.15s/it]

{'loss': 0.0, 'learning_rate': 4.888059701492538e-05, 'epoch': 1.74}


 59%|█████▉    | 688/1170 [43:07<25:15,  3.14s/it]

{'loss': 0.0, 'learning_rate': 4.8283582089552244e-05, 'epoch': 1.76}


 59%|█████▉    | 696/1170 [43:32<24:47,  3.14s/it]

{'loss': 0.0, 'learning_rate': 4.768656716417911e-05, 'epoch': 1.78}


 60%|██████    | 704/1170 [43:58<24:26,  3.15s/it]

{'loss': 0.0, 'learning_rate': 4.708955223880597e-05, 'epoch': 1.8}


 61%|██████    | 712/1170 [44:23<23:57,  3.14s/it]

{'loss': 0.0, 'learning_rate': 4.6492537313432837e-05, 'epoch': 1.82}


 62%|██████▏   | 720/1170 [44:48<23:35,  3.14s/it]

{'loss': 0.0, 'learning_rate': 4.58955223880597e-05, 'epoch': 1.84}


 62%|██████▏   | 728/1170 [45:13<23:05,  3.13s/it]

{'loss': 0.0, 'learning_rate': 4.529850746268657e-05, 'epoch': 1.86}


 63%|██████▎   | 736/1170 [45:38<22:46,  3.15s/it]

{'loss': 0.0, 'learning_rate': 4.4701492537313436e-05, 'epoch': 1.88}


 64%|██████▎   | 744/1170 [46:03<22:21,  3.15s/it]

{'loss': 0.0, 'learning_rate': 4.4104477611940296e-05, 'epoch': 1.9}


 64%|██████▍   | 752/1170 [46:28<21:42,  3.12s/it]

{'loss': 0.0, 'learning_rate': 4.350746268656717e-05, 'epoch': 1.93}


 65%|██████▍   | 760/1170 [46:53<21:32,  3.15s/it]

{'loss': 0.0, 'learning_rate': 4.2910447761194036e-05, 'epoch': 1.95}


 66%|██████▌   | 768/1170 [47:19<21:07,  3.15s/it]

{'loss': 0.0, 'learning_rate': 4.2313432835820895e-05, 'epoch': 1.97}


 66%|██████▋   | 776/1170 [47:44<20:36,  3.14s/it]

{'loss': 0.0, 'learning_rate': 4.171641791044776e-05, 'epoch': 1.99}


  _warn_prf(average, modifier, msg_start, len(result))
                                                  
 67%|██████▋   | 781/1170 [54:58<20:13,  3.12s/it] 

{'eval_loss': nan, 'eval_accuracy': 0.5, 'eval_f1': 0.0, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_runtime': 415.9972, 'eval_samples_per_second': 60.097, 'eval_steps_per_second': 7.512, 'epoch': 2.0}


 67%|██████▋   | 784/1170 [55:29<7:14:43, 67.57s/it]  

{'loss': 0.0, 'learning_rate': 4.111940298507463e-05, 'epoch': 2.01}


 68%|██████▊   | 792/1170 [55:50<40:32,  6.43s/it]  

{'loss': 0.0, 'learning_rate': 4.0522388059701495e-05, 'epoch': 2.03}


 68%|██████▊   | 800/1170 [56:13<19:37,  3.18s/it]

{'loss': 0.0, 'learning_rate': 3.992537313432836e-05, 'epoch': 2.05}


 69%|██████▉   | 808/1170 [56:38<18:36,  3.08s/it]

{'loss': 0.0, 'learning_rate': 3.932835820895522e-05, 'epoch': 2.07}


 70%|██████▉   | 816/1170 [57:03<18:09,  3.08s/it]

{'loss': 0.0, 'learning_rate': 3.873134328358209e-05, 'epoch': 2.09}


 70%|███████   | 824/1170 [57:28<18:30,  3.21s/it]

{'loss': 0.0, 'learning_rate': 3.813432835820896e-05, 'epoch': 2.11}


 71%|███████   | 832/1170 [57:52<17:22,  3.08s/it]

{'loss': 0.0, 'learning_rate': 3.753731343283583e-05, 'epoch': 2.13}


 72%|███████▏  | 840/1170 [58:17<16:50,  3.06s/it]

{'loss': 0.0, 'learning_rate': 3.694029850746269e-05, 'epoch': 2.15}


 72%|███████▏  | 848/1170 [58:41<16:27,  3.07s/it]

{'loss': 0.0, 'learning_rate': 3.6343283582089554e-05, 'epoch': 2.17}


 73%|███████▎  | 856/1170 [59:06<15:58,  3.05s/it]

{'loss': 0.0, 'learning_rate': 3.574626865671642e-05, 'epoch': 2.19}


 74%|███████▍  | 864/1170 [59:30<15:33,  3.05s/it]

{'loss': 0.0, 'learning_rate': 3.514925373134329e-05, 'epoch': 2.21}


 75%|███████▍  | 872/1170 [59:55<15:10,  3.05s/it]

{'loss': 0.0, 'learning_rate': 3.455223880597015e-05, 'epoch': 2.23}


 75%|███████▌  | 880/1170 [1:00:19<14:44,  3.05s/it]

{'loss': 0.0, 'learning_rate': 3.395522388059701e-05, 'epoch': 2.25}


 76%|███████▌  | 888/1170 [1:00:44<14:22,  3.06s/it]

{'loss': 0.0, 'learning_rate': 3.335820895522388e-05, 'epoch': 2.27}


 77%|███████▋  | 896/1170 [1:01:08<13:58,  3.06s/it]

{'loss': 0.0, 'learning_rate': 3.276119402985075e-05, 'epoch': 2.29}


 77%|███████▋  | 904/1170 [1:01:33<13:34,  3.06s/it]

{'loss': 0.0, 'learning_rate': 3.216417910447761e-05, 'epoch': 2.31}


 78%|███████▊  | 912/1170 [1:01:57<13:11,  3.07s/it]

{'loss': 0.0, 'learning_rate': 3.156716417910448e-05, 'epoch': 2.33}


 79%|███████▊  | 920/1170 [1:02:22<12:46,  3.07s/it]

{'loss': 0.0, 'learning_rate': 3.0970149253731346e-05, 'epoch': 2.36}


 79%|███████▉  | 928/1170 [1:02:46<12:27,  3.09s/it]

{'loss': 0.0, 'learning_rate': 3.037313432835821e-05, 'epoch': 2.38}


 80%|████████  | 936/1170 [1:03:10<11:29,  2.95s/it]

{'loss': 0.0, 'learning_rate': 2.9776119402985076e-05, 'epoch': 2.4}


 81%|████████  | 944/1170 [1:03:36<11:43,  3.11s/it]

{'loss': 0.0, 'learning_rate': 2.917910447761194e-05, 'epoch': 2.42}


 81%|████████▏ | 952/1170 [1:04:01<11:23,  3.13s/it]

{'loss': 0.0, 'learning_rate': 2.8582089552238805e-05, 'epoch': 2.44}


 82%|████████▏ | 960/1170 [1:04:26<11:19,  3.24s/it]

{'loss': 0.0, 'learning_rate': 2.7985074626865672e-05, 'epoch': 2.46}


 83%|████████▎ | 968/1170 [1:04:51<10:35,  3.15s/it]

{'loss': 0.0, 'learning_rate': 2.7388059701492542e-05, 'epoch': 2.48}


 83%|████████▎ | 976/1170 [1:05:17<10:14,  3.17s/it]

{'loss': 0.0, 'learning_rate': 2.6791044776119405e-05, 'epoch': 2.5}


 84%|████████▍ | 984/1170 [1:05:42<09:44,  3.14s/it]

{'loss': 0.0, 'learning_rate': 2.619402985074627e-05, 'epoch': 2.52}


 85%|████████▍ | 992/1170 [1:06:07<09:22,  3.16s/it]

{'loss': 0.0, 'learning_rate': 2.5597014925373135e-05, 'epoch': 2.54}


 85%|████████▌ | 1000/1170 [1:06:32<08:53,  3.14s/it]

{'loss': 0.0, 'learning_rate': 2.5e-05, 'epoch': 2.56}


 86%|████████▌ | 1008/1170 [1:06:57<08:31,  3.16s/it]

{'loss': 0.0, 'learning_rate': 2.4402985074626868e-05, 'epoch': 2.58}


 87%|████████▋ | 1016/1170 [1:07:23<08:03,  3.14s/it]

{'loss': 0.0, 'learning_rate': 2.380597014925373e-05, 'epoch': 2.6}


 88%|████████▊ | 1024/1170 [1:07:48<07:40,  3.15s/it]

{'loss': 0.0, 'learning_rate': 2.3208955223880597e-05, 'epoch': 2.62}


 88%|████████▊ | 1032/1170 [1:08:13<07:13,  3.14s/it]

{'loss': 0.0, 'learning_rate': 2.2611940298507464e-05, 'epoch': 2.64}


 89%|████████▉ | 1040/1170 [1:08:38<06:49,  3.15s/it]

{'loss': 0.0, 'learning_rate': 2.201492537313433e-05, 'epoch': 2.66}


 90%|████████▉ | 1048/1170 [1:09:03<06:22,  3.14s/it]

{'loss': 0.0, 'learning_rate': 2.1417910447761194e-05, 'epoch': 2.68}


 90%|█████████ | 1056/1170 [1:09:29<05:59,  3.15s/it]

{'loss': 0.0, 'learning_rate': 2.0820895522388064e-05, 'epoch': 2.7}


 91%|█████████ | 1064/1170 [1:09:54<05:32,  3.14s/it]

{'loss': 0.0, 'learning_rate': 2.0223880597014927e-05, 'epoch': 2.72}


 92%|█████████▏| 1072/1170 [1:10:19<05:07,  3.14s/it]

{'loss': 0.0, 'learning_rate': 1.9626865671641793e-05, 'epoch': 2.74}


 92%|█████████▏| 1080/1170 [1:10:44<04:42,  3.14s/it]

{'loss': 0.0, 'learning_rate': 1.9029850746268656e-05, 'epoch': 2.76}


 93%|█████████▎| 1088/1170 [1:11:09<04:17,  3.14s/it]

{'loss': 0.0, 'learning_rate': 1.8432835820895523e-05, 'epoch': 2.79}


 94%|█████████▎| 1096/1170 [1:11:34<03:52,  3.14s/it]

{'loss': 0.0, 'learning_rate': 1.783582089552239e-05, 'epoch': 2.81}


 94%|█████████▍| 1104/1170 [1:11:59<03:27,  3.15s/it]

{'loss': 0.0, 'learning_rate': 1.7238805970149256e-05, 'epoch': 2.83}


 95%|█████████▌| 1112/1170 [1:12:25<03:02,  3.15s/it]

{'loss': 0.0, 'learning_rate': 1.664179104477612e-05, 'epoch': 2.85}


 96%|█████████▌| 1120/1170 [1:12:50<02:37,  3.14s/it]

{'loss': 0.0, 'learning_rate': 1.6044776119402986e-05, 'epoch': 2.87}


 96%|█████████▋| 1128/1170 [1:13:15<02:12,  3.15s/it]

{'loss': 0.0, 'learning_rate': 1.5447761194029852e-05, 'epoch': 2.89}


 97%|█████████▋| 1136/1170 [1:13:40<01:46,  3.14s/it]

{'loss': 0.0, 'learning_rate': 1.4850746268656717e-05, 'epoch': 2.91}


 98%|█████████▊| 1144/1170 [1:14:05<01:21,  3.15s/it]

{'loss': 0.0, 'learning_rate': 1.4253731343283584e-05, 'epoch': 2.93}


 98%|█████████▊| 1152/1170 [1:14:31<00:56,  3.15s/it]

{'loss': 0.0, 'learning_rate': 1.3656716417910448e-05, 'epoch': 2.95}


 99%|█████████▉| 1160/1170 [1:14:57<00:33,  3.38s/it]

{'loss': 0.0, 'learning_rate': 1.3059701492537313e-05, 'epoch': 2.97}


100%|█████████▉| 1168/1170 [1:15:25<00:06,  3.41s/it]

{'loss': 0.0, 'learning_rate': 1.246268656716418e-05, 'epoch': 2.99}


  _warn_prf(average, modifier, msg_start, len(result))
                                                     
100%|██████████| 1170/1170 [1:22:46<00:00,  3.40s/it]

{'eval_loss': nan, 'eval_accuracy': 0.5, 'eval_f1': 0.0, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_runtime': 432.7112, 'eval_samples_per_second': 57.775, 'eval_steps_per_second': 7.222, 'epoch': 3.0}


100%|██████████| 1170/1170 [1:22:55<00:00,  4.25s/it]

{'train_runtime': 4991.868, 'train_samples_per_second': 15.024, 'train_steps_per_second': 0.234, 'train_loss': 0.0, 'epoch': 3.0}





TrainOutput(global_step=1170, training_loss=0.0, metrics={'train_runtime': 4991.868, 'train_samples_per_second': 15.024, 'train_steps_per_second': 0.234, 'train_loss': 0.0, 'epoch': 3.0})

In [14]:
import wandb
wandb.init()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mfaiqfawwazain[0m ([33mrouch[0m). Use [1m`wandb login --relogin`[0m to force relogin


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…

In [16]:
config = wandb.config
config.logging_dir='logs'
config.dataloader_num_workers = 8,

    

In [17]:
# train the model
trainer.train()


  1%|          | 8/1170 [00:32<53:18,  2.75s/it]  

{'loss': 0.6984, 'learning_rate': 8.000000000000001e-07, 'epoch': 0.02}


  1%|▏         | 16/1170 [00:51<46:00,  2.39s/it]

{'loss': 0.7011, 'learning_rate': 1.6000000000000001e-06, 'epoch': 0.04}


  2%|▏         | 24/1170 [01:09<43:45,  2.29s/it]

{'loss': 0.6951, 'learning_rate': 2.3e-06, 'epoch': 0.06}


  3%|▎         | 32/1170 [01:28<43:49,  2.31s/it]

{'loss': 0.6913, 'learning_rate': 3.1e-06, 'epoch': 0.08}


  3%|▎         | 40/1170 [01:46<44:11,  2.35s/it]

{'loss': 0.6942, 'learning_rate': 3.9e-06, 'epoch': 0.1}


  4%|▍         | 48/1170 [02:06<45:21,  2.43s/it]

{'loss': 0.6898, 'learning_rate': 4.7e-06, 'epoch': 0.12}


  5%|▍         | 56/1170 [02:26<48:22,  2.61s/it]

{'loss': 0.6858, 'learning_rate': 5.500000000000001e-06, 'epoch': 0.14}


  5%|▌         | 64/1170 [02:48<48:58,  2.66s/it]

{'loss': 0.6824, 'learning_rate': 6.2e-06, 'epoch': 0.16}


  6%|▌         | 72/1170 [03:09<49:57,  2.73s/it]

{'loss': 0.6616, 'learning_rate': 7.000000000000001e-06, 'epoch': 0.18}


  7%|▋         | 80/1170 [03:32<50:53,  2.80s/it]

{'loss': 0.5276, 'learning_rate': 7.8e-06, 'epoch': 0.2}


  8%|▊         | 88/1170 [03:54<50:35,  2.81s/it]

{'loss': 0.3129, 'learning_rate': 8.599999999999999e-06, 'epoch': 0.23}


  8%|▊         | 96/1170 [04:17<51:04,  2.85s/it]

{'loss': 0.2899, 'learning_rate': 9.4e-06, 'epoch': 0.25}


  9%|▉         | 104/1170 [04:40<51:51,  2.92s/it]

{'loss': 0.3232, 'learning_rate': 1.02e-05, 'epoch': 0.27}


 10%|▉         | 112/1170 [05:04<52:24,  2.97s/it]

{'loss': 0.246, 'learning_rate': 1.1000000000000001e-05, 'epoch': 0.29}


 10%|█         | 120/1170 [05:27<51:53,  2.96s/it]

{'loss': 0.19, 'learning_rate': 1.18e-05, 'epoch': 0.31}


 11%|█         | 128/1170 [05:52<53:07,  3.06s/it]

{'loss': 0.2745, 'learning_rate': 1.2600000000000001e-05, 'epoch': 0.33}


 12%|█▏        | 136/1170 [06:16<52:06,  3.02s/it]

{'loss': 0.2472, 'learning_rate': 1.3400000000000002e-05, 'epoch': 0.35}


 12%|█▏        | 144/1170 [06:42<56:03,  3.28s/it]

{'loss': 0.2227, 'learning_rate': 1.42e-05, 'epoch': 0.37}


 13%|█▎        | 152/1170 [07:07<55:13,  3.26s/it]

{'loss': 0.2035, 'learning_rate': 1.5e-05, 'epoch': 0.39}


 14%|█▎        | 160/1170 [07:32<52:40,  3.13s/it]

{'loss': 0.1846, 'learning_rate': 1.58e-05, 'epoch': 0.41}


 14%|█▍        | 168/1170 [07:57<52:01,  3.11s/it]

{'loss': 0.1915, 'learning_rate': 1.66e-05, 'epoch': 0.43}


 15%|█▌        | 176/1170 [08:24<56:11,  3.39s/it]

{'loss': 0.2057, 'learning_rate': 1.74e-05, 'epoch': 0.45}


 16%|█▌        | 184/1170 [08:51<54:26,  3.31s/it]

{'loss': 0.2207, 'learning_rate': 1.8200000000000002e-05, 'epoch': 0.47}


 16%|█▋        | 192/1170 [09:17<52:35,  3.23s/it]

{'loss': 0.1737, 'learning_rate': 1.9e-05, 'epoch': 0.49}


 17%|█▋        | 200/1170 [09:43<52:01,  3.22s/it]

{'loss': 0.1773, 'learning_rate': 1.9800000000000004e-05, 'epoch': 0.51}


 18%|█▊        | 208/1170 [10:09<53:10,  3.32s/it]

{'loss': 0.2716, 'learning_rate': 2.06e-05, 'epoch': 0.53}


 18%|█▊        | 216/1170 [10:36<54:09,  3.41s/it]

{'loss': 0.284, 'learning_rate': 2.1400000000000002e-05, 'epoch': 0.55}


 19%|█▉        | 224/1170 [11:03<53:43,  3.41s/it]

{'loss': 0.2449, 'learning_rate': 2.22e-05, 'epoch': 0.57}


 20%|█▉        | 232/1170 [11:31<53:17,  3.41s/it]

{'loss': 0.2444, 'learning_rate': 2.3000000000000003e-05, 'epoch': 0.59}


 21%|██        | 240/1170 [11:58<53:06,  3.43s/it]

{'loss': 0.2184, 'learning_rate': 2.38e-05, 'epoch': 0.61}


 21%|██        | 248/1170 [12:25<52:23,  3.41s/it]

{'loss': 0.2323, 'learning_rate': 2.46e-05, 'epoch': 0.63}


 22%|██▏       | 256/1170 [12:53<52:04,  3.42s/it]

{'loss': 0.2195, 'learning_rate': 2.54e-05, 'epoch': 0.66}


 23%|██▎       | 264/1170 [13:20<51:31,  3.41s/it]

{'loss': 0.1767, 'learning_rate': 2.6200000000000003e-05, 'epoch': 0.68}


 23%|██▎       | 272/1170 [13:48<50:58,  3.41s/it]

{'loss': 0.182, 'learning_rate': 2.7000000000000002e-05, 'epoch': 0.7}


 24%|██▍       | 280/1170 [14:15<50:36,  3.41s/it]

{'loss': 0.2026, 'learning_rate': 2.7800000000000005e-05, 'epoch': 0.72}


 25%|██▍       | 288/1170 [14:42<50:10,  3.41s/it]

{'loss': 0.1486, 'learning_rate': 2.86e-05, 'epoch': 0.74}


 25%|██▌       | 296/1170 [15:09<49:37,  3.41s/it]

{'loss': 0.1806, 'learning_rate': 2.94e-05, 'epoch': 0.76}


 26%|██▌       | 304/1170 [15:37<49:10,  3.41s/it]

{'loss': 0.1477, 'learning_rate': 3.02e-05, 'epoch': 0.78}


 27%|██▋       | 312/1170 [16:04<48:42,  3.41s/it]

{'loss': 0.2156, 'learning_rate': 3.1e-05, 'epoch': 0.8}


 27%|██▋       | 320/1170 [16:31<48:15,  3.41s/it]

{'loss': 0.2035, 'learning_rate': 3.18e-05, 'epoch': 0.82}


 28%|██▊       | 328/1170 [16:58<47:52,  3.41s/it]

{'loss': 0.1923, 'learning_rate': 3.26e-05, 'epoch': 0.84}


 29%|██▊       | 336/1170 [17:26<47:23,  3.41s/it]

{'loss': 0.1765, 'learning_rate': 3.3400000000000005e-05, 'epoch': 0.86}


 29%|██▉       | 344/1170 [17:53<47:02,  3.42s/it]

{'loss': 0.196, 'learning_rate': 3.4200000000000005e-05, 'epoch': 0.88}


 30%|███       | 352/1170 [18:20<46:26,  3.41s/it]

{'loss': 0.2622, 'learning_rate': 3.5e-05, 'epoch': 0.9}


 31%|███       | 360/1170 [18:48<46:09,  3.42s/it]

{'loss': 0.173, 'learning_rate': 3.58e-05, 'epoch': 0.92}


 31%|███▏      | 368/1170 [19:15<45:35,  3.41s/it]

{'loss': 0.1767, 'learning_rate': 3.66e-05, 'epoch': 0.94}


 32%|███▏      | 376/1170 [19:42<45:17,  3.42s/it]

{'loss': 0.173, 'learning_rate': 3.74e-05, 'epoch': 0.96}


 33%|███▎      | 384/1170 [20:10<44:40,  3.41s/it]

{'loss': 0.1533, 'learning_rate': 3.82e-05, 'epoch': 0.98}


                                                  
 33%|███▎      | 390/1170 [27:57<44:17,  3.41s/it] 

{'eval_loss': 0.18235643208026886, 'eval_accuracy': 0.92468, 'eval_f1': 0.9289246215981579, 'eval_precision': 0.8793682555563496, 'eval_recall': 0.9844, 'eval_runtime': 443.3143, 'eval_samples_per_second': 56.393, 'eval_steps_per_second': 7.049, 'epoch': 1.0}


 34%|███▎      | 392/1170 [29:08<23:54:52, 110.66s/it]

{'loss': 0.1391, 'learning_rate': 3.9000000000000006e-05, 'epoch': 1.0}


 34%|███▍      | 400/1170 [29:28<1:52:52,  8.80s/it]  

{'loss': 0.1424, 'learning_rate': 3.9800000000000005e-05, 'epoch': 1.02}


 35%|███▍      | 408/1170 [29:50<39:43,  3.13s/it]  

{'loss': 0.1742, 'learning_rate': 4.0600000000000004e-05, 'epoch': 1.04}


 36%|███▌      | 416/1170 [30:14<38:02,  3.03s/it]

{'loss': 0.1551, 'learning_rate': 4.14e-05, 'epoch': 1.06}


 36%|███▌      | 424/1170 [30:40<40:43,  3.28s/it]

{'loss': 0.0936, 'learning_rate': 4.22e-05, 'epoch': 1.09}


 37%|███▋      | 432/1170 [31:07<41:31,  3.38s/it]

{'loss': 0.1318, 'learning_rate': 4.3e-05, 'epoch': 1.11}


 38%|███▊      | 440/1170 [31:34<41:11,  3.39s/it]

{'loss': 0.1676, 'learning_rate': 4.38e-05, 'epoch': 1.13}


 38%|███▊      | 448/1170 [32:01<40:45,  3.39s/it]

{'loss': 0.1474, 'learning_rate': 4.46e-05, 'epoch': 1.15}


 39%|███▉      | 456/1170 [32:28<40:14,  3.38s/it]

{'loss': 0.156, 'learning_rate': 4.5400000000000006e-05, 'epoch': 1.17}


 40%|███▉      | 464/1170 [32:55<39:48,  3.38s/it]

{'loss': 0.1241, 'learning_rate': 4.6200000000000005e-05, 'epoch': 1.19}


 40%|████      | 472/1170 [33:22<39:28,  3.39s/it]

{'loss': 0.1454, 'learning_rate': 4.7e-05, 'epoch': 1.21}


 41%|████      | 480/1170 [33:50<39:11,  3.41s/it]

{'loss': 0.1744, 'learning_rate': 4.78e-05, 'epoch': 1.23}


 42%|████▏     | 488/1170 [34:17<38:46,  3.41s/it]

{'loss': 0.1683, 'learning_rate': 4.86e-05, 'epoch': 1.25}


 42%|████▏     | 496/1170 [34:44<38:14,  3.40s/it]

{'loss': 0.1316, 'learning_rate': 4.94e-05, 'epoch': 1.27}


 43%|████▎     | 504/1170 [35:11<37:50,  3.41s/it]

{'loss': 0.1289, 'learning_rate': 4.985074626865672e-05, 'epoch': 1.29}


 44%|████▍     | 512/1170 [35:39<37:24,  3.41s/it]

{'loss': 0.1872, 'learning_rate': 4.9253731343283586e-05, 'epoch': 1.31}


 44%|████▍     | 520/1170 [36:06<36:59,  3.41s/it]

{'loss': 0.1597, 'learning_rate': 4.8656716417910445e-05, 'epoch': 1.33}


 45%|████▌     | 528/1170 [36:33<36:29,  3.41s/it]

{'loss': 0.18, 'learning_rate': 4.805970149253732e-05, 'epoch': 1.35}


 46%|████▌     | 536/1170 [37:01<36:02,  3.41s/it]

{'loss': 0.1448, 'learning_rate': 4.7462686567164185e-05, 'epoch': 1.37}


 46%|████▋     | 544/1170 [37:28<35:36,  3.41s/it]

{'loss': 0.173, 'learning_rate': 4.6865671641791045e-05, 'epoch': 1.39}


 47%|████▋     | 552/1170 [37:55<35:10,  3.41s/it]

{'loss': 0.102, 'learning_rate': 4.626865671641791e-05, 'epoch': 1.41}


 48%|████▊     | 560/1170 [38:23<34:50,  3.43s/it]

{'loss': 0.1429, 'learning_rate': 4.567164179104478e-05, 'epoch': 1.43}


 49%|████▊     | 568/1170 [38:50<34:14,  3.41s/it]

{'loss': 0.1543, 'learning_rate': 4.5074626865671645e-05, 'epoch': 1.45}


 49%|████▉     | 576/1170 [39:17<33:45,  3.41s/it]

{'loss': 0.1965, 'learning_rate': 4.447761194029851e-05, 'epoch': 1.47}


 50%|████▉     | 584/1170 [39:45<33:18,  3.41s/it]

{'loss': 0.1327, 'learning_rate': 4.388059701492537e-05, 'epoch': 1.5}


 51%|█████     | 592/1170 [40:12<32:53,  3.41s/it]

{'loss': 0.2026, 'learning_rate': 4.328358208955224e-05, 'epoch': 1.52}


 51%|█████▏    | 600/1170 [40:39<32:25,  3.41s/it]

{'loss': 0.1637, 'learning_rate': 4.268656716417911e-05, 'epoch': 1.54}


 52%|█████▏    | 608/1170 [41:07<31:56,  3.41s/it]

{'loss': 0.1339, 'learning_rate': 4.208955223880597e-05, 'epoch': 1.56}


 53%|█████▎    | 616/1170 [41:34<31:36,  3.42s/it]

{'loss': 0.1703, 'learning_rate': 4.149253731343284e-05, 'epoch': 1.58}


 53%|█████▎    | 624/1170 [42:01<31:00,  3.41s/it]

{'loss': 0.0949, 'learning_rate': 4.0895522388059703e-05, 'epoch': 1.6}


 54%|█████▍    | 632/1170 [42:28<30:36,  3.41s/it]

{'loss': 0.1052, 'learning_rate': 4.029850746268657e-05, 'epoch': 1.62}


 55%|█████▍    | 640/1170 [42:56<30:09,  3.41s/it]

{'loss': 0.133, 'learning_rate': 3.9701492537313437e-05, 'epoch': 1.64}


 55%|█████▌    | 648/1170 [43:23<29:40,  3.41s/it]

{'loss': 0.1903, 'learning_rate': 3.9104477611940296e-05, 'epoch': 1.66}


 56%|█████▌    | 656/1170 [43:50<29:12,  3.41s/it]

{'loss': 0.2167, 'learning_rate': 3.850746268656716e-05, 'epoch': 1.68}


 57%|█████▋    | 664/1170 [44:18<28:48,  3.42s/it]

{'loss': 0.1532, 'learning_rate': 3.791044776119403e-05, 'epoch': 1.7}


 57%|█████▋    | 672/1170 [44:45<28:19,  3.41s/it]

{'loss': 0.175, 'learning_rate': 3.73134328358209e-05, 'epoch': 1.72}


 58%|█████▊    | 680/1170 [45:12<27:49,  3.41s/it]

{'loss': 0.1814, 'learning_rate': 3.671641791044776e-05, 'epoch': 1.74}


 59%|█████▉    | 688/1170 [45:40<27:25,  3.41s/it]

{'loss': 0.134, 'learning_rate': 3.611940298507463e-05, 'epoch': 1.76}


 59%|█████▉    | 696/1170 [46:07<26:57,  3.41s/it]

{'loss': 0.1496, 'learning_rate': 3.5522388059701495e-05, 'epoch': 1.78}


 60%|██████    | 704/1170 [46:34<26:30,  3.41s/it]

{'loss': 0.1498, 'learning_rate': 3.492537313432836e-05, 'epoch': 1.8}


 61%|██████    | 712/1170 [47:02<26:00,  3.41s/it]

{'loss': 0.1092, 'learning_rate': 3.432835820895522e-05, 'epoch': 1.82}


 62%|██████▏   | 720/1170 [47:29<25:33,  3.41s/it]

{'loss': 0.1683, 'learning_rate': 3.373134328358209e-05, 'epoch': 1.84}


 62%|██████▏   | 728/1170 [47:56<25:04,  3.40s/it]

{'loss': 0.1747, 'learning_rate': 3.3134328358208955e-05, 'epoch': 1.86}


 63%|██████▎   | 736/1170 [48:23<24:39,  3.41s/it]

{'loss': 0.125, 'learning_rate': 3.253731343283582e-05, 'epoch': 1.88}


 64%|██████▎   | 744/1170 [48:51<24:12,  3.41s/it]

{'loss': 0.1357, 'learning_rate': 3.194029850746269e-05, 'epoch': 1.9}


 64%|██████▍   | 752/1170 [49:18<23:44,  3.41s/it]

{'loss': 0.1289, 'learning_rate': 3.1343283582089554e-05, 'epoch': 1.93}


 65%|██████▍   | 760/1170 [49:45<23:15,  3.40s/it]

{'loss': 0.1446, 'learning_rate': 3.074626865671642e-05, 'epoch': 1.95}


 66%|██████▌   | 768/1170 [50:12<22:48,  3.41s/it]

{'loss': 0.1171, 'learning_rate': 3.0149253731343284e-05, 'epoch': 1.97}


 66%|██████▋   | 776/1170 [50:40<22:21,  3.41s/it]

{'loss': 0.1291, 'learning_rate': 2.955223880597015e-05, 'epoch': 1.99}


                                                  
 67%|██████▋   | 781/1170 [58:22<22:05,  3.41s/it] 

{'eval_loss': 0.13033834099769592, 'eval_accuracy': 0.9526, 'eval_f1': 0.9528320662341282, 'eval_precision': 0.9481898122474848, 'eval_recall': 0.95752, 'eval_runtime': 442.8456, 'eval_samples_per_second': 56.453, 'eval_steps_per_second': 7.057, 'epoch': 2.0}


 67%|██████▋   | 784/1170 [59:37<8:23:20, 78.24s/it]  

{'loss': 0.1376, 'learning_rate': 2.8955223880597017e-05, 'epoch': 2.01}


 68%|██████▊   | 792/1170 [59:58<43:50,  6.96s/it]  

{'loss': 0.0712, 'learning_rate': 2.835820895522388e-05, 'epoch': 2.03}


 68%|██████▊   | 800/1170 [1:00:20<18:48,  3.05s/it]

{'loss': 0.0797, 'learning_rate': 2.7761194029850747e-05, 'epoch': 2.05}


 69%|██████▉   | 808/1170 [1:00:44<17:59,  2.98s/it]

{'loss': 0.0712, 'learning_rate': 2.716417910447761e-05, 'epoch': 2.07}


 70%|██████▉   | 816/1170 [1:01:09<18:50,  3.19s/it]

{'loss': 0.08, 'learning_rate': 2.656716417910448e-05, 'epoch': 2.09}


 70%|███████   | 824/1170 [1:01:35<19:01,  3.30s/it]

{'loss': 0.0634, 'learning_rate': 2.5970149253731346e-05, 'epoch': 2.11}


 71%|███████   | 832/1170 [1:02:02<18:48,  3.34s/it]

{'loss': 0.1242, 'learning_rate': 2.537313432835821e-05, 'epoch': 2.13}


 72%|███████▏  | 840/1170 [1:02:29<18:25,  3.35s/it]

{'loss': 0.0476, 'learning_rate': 2.4776119402985076e-05, 'epoch': 2.15}


 72%|███████▏  | 848/1170 [1:02:56<18:07,  3.38s/it]

{'loss': 0.0718, 'learning_rate': 2.4179104477611943e-05, 'epoch': 2.17}


 73%|███████▎  | 856/1170 [1:03:23<17:43,  3.39s/it]

{'loss': 0.0962, 'learning_rate': 2.3582089552238806e-05, 'epoch': 2.19}


 74%|███████▍  | 864/1170 [1:03:50<17:22,  3.41s/it]

{'loss': 0.1246, 'learning_rate': 2.2985074626865672e-05, 'epoch': 2.21}


 75%|███████▍  | 872/1170 [1:04:17<16:56,  3.41s/it]

{'loss': 0.1039, 'learning_rate': 2.238805970149254e-05, 'epoch': 2.23}


 75%|███████▌  | 880/1170 [1:04:45<16:28,  3.41s/it]

{'loss': 0.0585, 'learning_rate': 2.1791044776119405e-05, 'epoch': 2.25}


 76%|███████▌  | 888/1170 [1:05:12<16:00,  3.41s/it]

{'loss': 0.0685, 'learning_rate': 2.119402985074627e-05, 'epoch': 2.27}


 77%|███████▋  | 896/1170 [1:05:39<15:33,  3.41s/it]

{'loss': 0.093, 'learning_rate': 2.0597014925373135e-05, 'epoch': 2.29}


 77%|███████▋  | 904/1170 [1:06:07<15:08,  3.41s/it]

{'loss': 0.1149, 'learning_rate': 2e-05, 'epoch': 2.31}


 78%|███████▊  | 912/1170 [1:06:34<14:40,  3.41s/it]

{'loss': 0.0753, 'learning_rate': 1.9402985074626868e-05, 'epoch': 2.33}


 79%|███████▊  | 920/1170 [1:07:01<14:11,  3.41s/it]

{'loss': 0.0759, 'learning_rate': 1.880597014925373e-05, 'epoch': 2.36}


 79%|███████▉  | 928/1170 [1:07:28<13:45,  3.41s/it]

{'loss': 0.0814, 'learning_rate': 1.8208955223880598e-05, 'epoch': 2.38}


 80%|████████  | 936/1170 [1:07:56<13:16,  3.40s/it]

{'loss': 0.0745, 'learning_rate': 1.761194029850746e-05, 'epoch': 2.4}


 81%|████████  | 944/1170 [1:08:23<12:52,  3.42s/it]

{'loss': 0.0974, 'learning_rate': 1.701492537313433e-05, 'epoch': 2.42}


 81%|████████▏ | 952/1170 [1:08:50<12:23,  3.41s/it]

{'loss': 0.0722, 'learning_rate': 1.6417910447761194e-05, 'epoch': 2.44}


 82%|████████▏ | 960/1170 [1:09:18<11:55,  3.41s/it]

{'loss': 0.0852, 'learning_rate': 1.582089552238806e-05, 'epoch': 2.46}


 83%|████████▎ | 968/1170 [1:09:45<11:29,  3.42s/it]

{'loss': 0.0585, 'learning_rate': 1.5223880597014925e-05, 'epoch': 2.48}


 83%|████████▎ | 976/1170 [1:10:12<11:01,  3.41s/it]

{'loss': 0.1258, 'learning_rate': 1.4626865671641794e-05, 'epoch': 2.5}


 84%|████████▍ | 984/1170 [1:10:40<10:36,  3.42s/it]

{'loss': 0.0754, 'learning_rate': 1.4029850746268658e-05, 'epoch': 2.52}


 85%|████████▍ | 992/1170 [1:11:07<10:06,  3.41s/it]

{'loss': 0.0674, 'learning_rate': 1.3432835820895523e-05, 'epoch': 2.54}


 85%|████████▌ | 1000/1170 [1:11:34<09:38,  3.40s/it]

{'loss': 0.0468, 'learning_rate': 1.2835820895522388e-05, 'epoch': 2.56}


 86%|████████▌ | 1008/1170 [1:12:01<09:11,  3.40s/it]

{'loss': 0.0638, 'learning_rate': 1.2238805970149255e-05, 'epoch': 2.58}


 87%|████████▋ | 1016/1170 [1:12:29<08:44,  3.41s/it]

{'loss': 0.1078, 'learning_rate': 1.164179104477612e-05, 'epoch': 2.6}


 88%|████████▊ | 1024/1170 [1:12:56<08:17,  3.41s/it]

{'loss': 0.087, 'learning_rate': 1.1044776119402986e-05, 'epoch': 2.62}


 88%|████████▊ | 1032/1170 [1:13:23<07:50,  3.41s/it]

{'loss': 0.0729, 'learning_rate': 1.0447761194029851e-05, 'epoch': 2.64}


 89%|████████▉ | 1040/1170 [1:13:50<07:23,  3.41s/it]

{'loss': 0.0381, 'learning_rate': 9.850746268656717e-06, 'epoch': 2.66}


 90%|████████▉ | 1048/1170 [1:14:18<06:55,  3.41s/it]

{'loss': 0.0975, 'learning_rate': 9.253731343283582e-06, 'epoch': 2.68}


 90%|█████████ | 1056/1170 [1:14:45<06:28,  3.41s/it]

{'loss': 0.0471, 'learning_rate': 8.656716417910449e-06, 'epoch': 2.7}


 91%|█████████ | 1064/1170 [1:15:12<06:01,  3.41s/it]

{'loss': 0.0404, 'learning_rate': 8.059701492537314e-06, 'epoch': 2.72}


 92%|█████████▏| 1072/1170 [1:15:40<05:34,  3.41s/it]

{'loss': 0.0439, 'learning_rate': 7.4626865671641785e-06, 'epoch': 2.74}


 92%|█████████▏| 1080/1170 [1:16:07<05:06,  3.41s/it]

{'loss': 0.0427, 'learning_rate': 6.865671641791045e-06, 'epoch': 2.76}


 93%|█████████▎| 1088/1170 [1:16:34<04:39,  3.41s/it]

{'loss': 0.0452, 'learning_rate': 6.26865671641791e-06, 'epoch': 2.79}


 94%|█████████▎| 1096/1170 [1:17:01<04:12,  3.41s/it]

{'loss': 0.1153, 'learning_rate': 5.671641791044776e-06, 'epoch': 2.81}


 94%|█████████▍| 1104/1170 [1:17:29<03:44,  3.41s/it]

{'loss': 0.0825, 'learning_rate': 5.074626865671642e-06, 'epoch': 2.83}


 95%|█████████▌| 1112/1170 [1:17:56<03:18,  3.42s/it]

{'loss': 0.0543, 'learning_rate': 4.477611940298508e-06, 'epoch': 2.85}


 96%|█████████▌| 1120/1170 [1:18:23<02:50,  3.41s/it]

{'loss': 0.0547, 'learning_rate': 3.8805970149253735e-06, 'epoch': 2.87}


 96%|█████████▋| 1128/1170 [1:18:51<02:23,  3.43s/it]

{'loss': 0.0624, 'learning_rate': 3.2835820895522387e-06, 'epoch': 2.89}


 97%|█████████▋| 1136/1170 [1:19:18<01:55,  3.41s/it]

{'loss': 0.0509, 'learning_rate': 2.6865671641791044e-06, 'epoch': 2.91}


 98%|█████████▊| 1144/1170 [1:19:45<01:28,  3.40s/it]

{'loss': 0.038, 'learning_rate': 2.0895522388059705e-06, 'epoch': 2.93}


 98%|█████████▊| 1152/1170 [1:20:12<01:01,  3.40s/it]

{'loss': 0.0301, 'learning_rate': 1.4925373134328358e-06, 'epoch': 2.95}


 99%|█████████▉| 1160/1170 [1:20:40<00:34,  3.41s/it]

{'loss': 0.0621, 'learning_rate': 8.955223880597016e-07, 'epoch': 2.97}


100%|█████████▉| 1168/1170 [1:21:07<00:06,  3.41s/it]

{'loss': 0.0686, 'learning_rate': 2.9850746268656716e-07, 'epoch': 2.99}


                                                     
100%|██████████| 1170/1170 [1:28:39<00:00,  3.41s/it]

{'eval_loss': 0.16553638875484467, 'eval_accuracy': 0.95624, 'eval_f1': 0.9566629694184757, 'eval_precision': 0.9475047080979284, 'eval_recall': 0.966, 'eval_runtime': 443.0918, 'eval_samples_per_second': 56.422, 'eval_steps_per_second': 7.053, 'epoch': 3.0}


100%|██████████| 1170/1170 [1:29:39<00:00,  4.60s/it]

{'train_runtime': 5379.4943, 'train_samples_per_second': 13.942, 'train_steps_per_second': 0.217, 'train_loss': 0.17678481300926616, 'epoch': 3.0}





TrainOutput(global_step=1170, training_loss=0.17678481300926616, metrics={'train_runtime': 5379.4943, 'train_samples_per_second': 13.942, 'train_steps_per_second': 0.217, 'train_loss': 0.17678481300926616, 'epoch': 3.0})

In [18]:
trainer.evaluate()


100%|██████████| 3125/3125 [06:14<00:00,  8.34it/s]


{'eval_loss': 0.13033834099769592,
 'eval_accuracy': 0.9526,
 'eval_f1': 0.9528320662341282,
 'eval_precision': 0.9481898122474848,
 'eval_recall': 0.95752,
 'eval_runtime': 382.5698,
 'eval_samples_per_second': 65.348,
 'eval_steps_per_second': 8.168,
 'epoch': 3.0}