In [1]:
import pandas as pd

finetuned_dirname = "40-epoch-bert-base-finetuned-twitter15-tf"

data = pd.read_csv("../../data/processed/twitter15-tf_dataset.csv", lineterminator="\n")
data = data[['tweet_text', 'tvt2', 'label']]
print(data.shape)
data.head()

(742, 3)


Unnamed: 0,tweet_text,tvt2,label
0,rip elly may clampett: so sad to learn #beverl...,training,True
1,just in: missing afghan soldiers found trying ...,training,True
2,#riphulkhogan my heart is ripping like your sh...,training,False
3,a chick-fil-a manager allegedly banned this hi...,validation,False
4,islamic tribunal using sharia law in texas has...,validation,False


In [2]:
combined_data = data

In [3]:
import torch

class CustomTextDataset(torch.utils.data.dataset.Dataset):

    def __init__(self, texts, labels):
        self.labels = labels
        self.texts = texts
        self.attention_mask = None
        self.input_ids = None
        self.token_type_ids = None

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        sample = {
            "text": self.texts[idx],
            "label": self.labels[idx],
            "attention_mask": self.attention_mask[idx] if self.attention_mask else None,
            "input_ids": self.input_ids[idx] if self.input_ids else None,
            "token_type_ids": self.token_type_ids[idx] if self.token_type_ids else None
        }
        return sample
    
    def tokenize(self, tokenizer):
        self.attention_mask = []
        self.input_ids = []
        self.token_type_ids = []

        for text in self.texts:
            token = tokenizer(text, padding="max_length", truncation=True)
            
            self.attention_mask.append(token['attention_mask'])
            self.input_ids.append(token['input_ids'])
            self.token_type_ids.append(token['token_type_ids'])

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
labels = []

labels_str = combined_data['label'].unique().tolist()
for i, d in combined_data.iterrows():
    lab = labels_str.index(d['label'])
    labels.append(lab)
    
print(len(labels))
labels[:10]

742


[0, 0, 1, 1, 1, 1, 1, 1, 0, 1]

In [5]:
train_dataset = CustomTextDataset(
    [d['tweet_text'] for i, d in combined_data.iterrows() if d['tvt2'] == 'training'],
    [labels[i] for i, d in combined_data.iterrows() if d['tvt2'] == 'training'])
test_dataset = CustomTextDataset(
    [d['tweet_text'] for i, d in combined_data.iterrows() if d['tvt2'] == 'validation'],
    [labels[i] for i, d in combined_data.iterrows() if d['tvt2'] == 'validation'])
train_dataset[0]

{'text': 'rip elly may clampett: so sad to learn #beverlyhillbillies star donna douglas has passed away. URL\r',
 'label': 0,
 'attention_mask': None,
 'input_ids': None,
 'token_type_ids': None}

In [6]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")

In [7]:
# inputs = tokenizer(["you're stuck in a timewrap from 2004 though", "summa lumma dumma lumma"], padding="max_length", truncation=True)
# for k,v in inputs.items():
#     print(k)

In [8]:
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

train_dataset.tokenize(tokenizer)
test_dataset.tokenize(tokenizer)

In [9]:
print(len(train_dataset))
print(len(test_dataset))

497
152


### Fine Tuning

In [10]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased",
                                                           output_hidden_states=False,
                                                           num_labels=2)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [11]:
from transformers import TrainingArguments

epochs = 40
batch_size = 8
save_steps = (round((len(train_dataset)/batch_size) + 0.5)) * epochs
# save_steps = 1_000_000

training_args = TrainingArguments(
    output_dir=f"../../data/models/{finetuned_dirname}",
    num_train_epochs=epochs,
    save_steps=save_steps,
    logging_steps=300,
    learning_rate=1e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    evaluation_strategy="epoch",
    logging_strategy="epoch"
)

print(f"Save Steps : {save_steps}")

Save Steps : 2520


In [12]:
import numpy as np
from datasets import load_metric

metric = load_metric("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

  metric = load_metric("accuracy")


In [13]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
)

In [14]:
import time

start = time.time()

trainer.train()

print(f"Execution Time : {round(time.time() - start)} seconds")

  2%|▎         | 63/2520 [00:19<12:13,  3.35it/s] 

{'loss': 0.6719, 'learning_rate': 9.75e-06, 'epoch': 1.0}


                                                 
  2%|▎         | 63/2520 [00:21<12:13,  3.35it/s]

{'eval_loss': 0.6210176348686218, 'eval_accuracy': 0.6907894736842105, 'eval_runtime': 1.7674, 'eval_samples_per_second': 86.001, 'eval_steps_per_second': 10.75, 'epoch': 1.0}


  5%|▌         | 126/2520 [00:40<11:56,  3.34it/s]

{'loss': 0.4667, 'learning_rate': 9.5e-06, 'epoch': 2.0}


                                                  
  5%|▌         | 126/2520 [00:42<11:56,  3.34it/s]

{'eval_loss': 0.3940335810184479, 'eval_accuracy': 0.868421052631579, 'eval_runtime': 1.7679, 'eval_samples_per_second': 85.976, 'eval_steps_per_second': 10.747, 'epoch': 2.0}


  8%|▊         | 189/2520 [01:00<11:39,  3.33it/s]

{'loss': 0.2052, 'learning_rate': 9.250000000000001e-06, 'epoch': 3.0}


                                                  
  8%|▊         | 189/2520 [01:02<11:39,  3.33it/s]

{'eval_loss': 0.34734484553337097, 'eval_accuracy': 0.868421052631579, 'eval_runtime': 1.7716, 'eval_samples_per_second': 85.798, 'eval_steps_per_second': 10.725, 'epoch': 3.0}


 10%|█         | 252/2520 [01:21<11:20,  3.33it/s]

{'loss': 0.0872, 'learning_rate': 9e-06, 'epoch': 4.0}


                                                  
 10%|█         | 252/2520 [01:22<11:20,  3.33it/s]

{'eval_loss': 0.3985846936702728, 'eval_accuracy': 0.875, 'eval_runtime': 1.7747, 'eval_samples_per_second': 85.649, 'eval_steps_per_second': 10.706, 'epoch': 4.0}


 12%|█▎        | 315/2520 [01:41<11:01,  3.33it/s]

{'loss': 0.0516, 'learning_rate': 8.750000000000001e-06, 'epoch': 5.0}


                                                  
 12%|█▎        | 315/2520 [01:43<11:01,  3.33it/s]

{'eval_loss': 0.44651544094085693, 'eval_accuracy': 0.875, 'eval_runtime': 1.7786, 'eval_samples_per_second': 85.461, 'eval_steps_per_second': 10.683, 'epoch': 5.0}


 15%|█▌        | 378/2520 [02:02<10:44,  3.32it/s]

{'loss': 0.0262, 'learning_rate': 8.5e-06, 'epoch': 6.0}


                                                  
 15%|█▌        | 378/2520 [02:04<10:44,  3.32it/s]

{'eval_loss': 0.567107617855072, 'eval_accuracy': 0.868421052631579, 'eval_runtime': 1.7817, 'eval_samples_per_second': 85.312, 'eval_steps_per_second': 10.664, 'epoch': 6.0}


 18%|█▊        | 441/2520 [02:22<10:22,  3.34it/s]

{'loss': 0.0123, 'learning_rate': 8.25e-06, 'epoch': 7.0}


                                                  
 18%|█▊        | 441/2520 [02:24<10:22,  3.34it/s]

{'eval_loss': 0.5684689879417419, 'eval_accuracy': 0.875, 'eval_runtime': 1.7886, 'eval_samples_per_second': 84.983, 'eval_steps_per_second': 10.623, 'epoch': 7.0}


 20%|██        | 504/2520 [02:43<10:06,  3.33it/s]

{'loss': 0.0084, 'learning_rate': 8.000000000000001e-06, 'epoch': 8.0}


                                                  
 20%|██        | 504/2520 [02:45<10:06,  3.33it/s]

{'eval_loss': 0.5581983327865601, 'eval_accuracy': 0.881578947368421, 'eval_runtime': 1.7841, 'eval_samples_per_second': 85.196, 'eval_steps_per_second': 10.649, 'epoch': 8.0}


 22%|██▎       | 567/2520 [03:03<09:46,  3.33it/s]

{'loss': 0.0007, 'learning_rate': 7.75e-06, 'epoch': 9.0}


                                                  
 22%|██▎       | 567/2520 [03:05<09:46,  3.33it/s]

{'eval_loss': 0.6002413034439087, 'eval_accuracy': 0.8881578947368421, 'eval_runtime': 1.7835, 'eval_samples_per_second': 85.227, 'eval_steps_per_second': 10.653, 'epoch': 9.0}


 25%|██▌       | 630/2520 [03:24<09:35,  3.29it/s]

{'loss': 0.0005, 'learning_rate': 7.500000000000001e-06, 'epoch': 10.0}


                                                  
 25%|██▌       | 630/2520 [03:26<09:35,  3.29it/s]

{'eval_loss': 0.625002920627594, 'eval_accuracy': 0.8881578947368421, 'eval_runtime': 1.7882, 'eval_samples_per_second': 85.001, 'eval_steps_per_second': 10.625, 'epoch': 10.0}


 28%|██▊       | 693/2520 [03:44<09:07,  3.34it/s]

{'loss': 0.0004, 'learning_rate': 7.25e-06, 'epoch': 11.0}


                                                  
 28%|██▊       | 693/2520 [03:46<09:07,  3.34it/s]

{'eval_loss': 0.648063063621521, 'eval_accuracy': 0.8881578947368421, 'eval_runtime': 1.7878, 'eval_samples_per_second': 85.022, 'eval_steps_per_second': 10.628, 'epoch': 11.0}


 30%|███       | 756/2520 [04:05<08:56,  3.29it/s]

{'loss': 0.0004, 'learning_rate': 7e-06, 'epoch': 12.0}


                                                  
 30%|███       | 756/2520 [04:07<08:56,  3.29it/s]

{'eval_loss': 0.6738107800483704, 'eval_accuracy': 0.8881578947368421, 'eval_runtime': 1.789, 'eval_samples_per_second': 84.963, 'eval_steps_per_second': 10.62, 'epoch': 12.0}


 32%|███▎      | 819/2520 [04:26<08:30,  3.33it/s]

{'loss': 0.0004, 'learning_rate': 6.750000000000001e-06, 'epoch': 13.0}


                                                  
 32%|███▎      | 819/2520 [04:28<08:30,  3.33it/s]

{'eval_loss': 0.7117390632629395, 'eval_accuracy': 0.875, 'eval_runtime': 1.7936, 'eval_samples_per_second': 84.746, 'eval_steps_per_second': 10.593, 'epoch': 13.0}


 35%|███▌      | 882/2520 [04:46<08:12,  3.33it/s]

{'loss': 0.0003, 'learning_rate': 6.5000000000000004e-06, 'epoch': 14.0}


                                                  
 35%|███▌      | 882/2520 [04:48<08:12,  3.33it/s]

{'eval_loss': 0.8164372444152832, 'eval_accuracy': 0.868421052631579, 'eval_runtime': 1.7896, 'eval_samples_per_second': 84.937, 'eval_steps_per_second': 10.617, 'epoch': 14.0}


 38%|███▊      | 945/2520 [05:07<07:53,  3.33it/s]

{'loss': 0.0003, 'learning_rate': 6.25e-06, 'epoch': 15.0}


                                                  
 38%|███▊      | 945/2520 [05:09<07:53,  3.33it/s]

{'eval_loss': 0.8072597980499268, 'eval_accuracy': 0.875, 'eval_runtime': 1.7908, 'eval_samples_per_second': 84.876, 'eval_steps_per_second': 10.61, 'epoch': 15.0}


 40%|████      | 1008/2520 [05:27<07:37,  3.30it/s]

{'loss': 0.0002, 'learning_rate': 6e-06, 'epoch': 16.0}


                                                   
 40%|████      | 1008/2520 [05:29<07:37,  3.30it/s]

{'eval_loss': 0.822089672088623, 'eval_accuracy': 0.875, 'eval_runtime': 1.7873, 'eval_samples_per_second': 85.045, 'eval_steps_per_second': 10.631, 'epoch': 16.0}


 42%|████▎     | 1071/2520 [05:48<07:17,  3.31it/s]

{'loss': 0.0002, 'learning_rate': 5.75e-06, 'epoch': 17.0}


                                                   
 42%|████▎     | 1071/2520 [05:50<07:17,  3.31it/s]

{'eval_loss': 0.8362907767295837, 'eval_accuracy': 0.875, 'eval_runtime': 1.7835, 'eval_samples_per_second': 85.227, 'eval_steps_per_second': 10.653, 'epoch': 17.0}


 45%|████▌     | 1134/2520 [06:09<06:56,  3.33it/s]

{'loss': 0.0002, 'learning_rate': 5.500000000000001e-06, 'epoch': 18.0}


                                                   
 45%|████▌     | 1134/2520 [06:10<06:56,  3.33it/s]

{'eval_loss': 0.8453768491744995, 'eval_accuracy': 0.875, 'eval_runtime': 1.7921, 'eval_samples_per_second': 84.818, 'eval_steps_per_second': 10.602, 'epoch': 18.0}


 48%|████▊     | 1197/2520 [06:29<06:37,  3.33it/s]

{'loss': 0.0002, 'learning_rate': 5.2500000000000006e-06, 'epoch': 19.0}


                                                   
 48%|████▊     | 1197/2520 [06:31<06:37,  3.33it/s]

{'eval_loss': 0.8549497723579407, 'eval_accuracy': 0.875, 'eval_runtime': 1.7902, 'eval_samples_per_second': 84.907, 'eval_steps_per_second': 10.613, 'epoch': 19.0}


 50%|█████     | 1260/2520 [06:50<06:18,  3.33it/s]

{'loss': 0.0002, 'learning_rate': 5e-06, 'epoch': 20.0}


                                                   
 50%|█████     | 1260/2520 [06:51<06:18,  3.33it/s]

{'eval_loss': 0.8633268475532532, 'eval_accuracy': 0.881578947368421, 'eval_runtime': 1.79, 'eval_samples_per_second': 84.918, 'eval_steps_per_second': 10.615, 'epoch': 20.0}


 52%|█████▎    | 1323/2520 [07:10<06:01,  3.31it/s]

{'loss': 0.0002, 'learning_rate': 4.75e-06, 'epoch': 21.0}


                                                   
 52%|█████▎    | 1323/2520 [07:12<06:01,  3.31it/s]

{'eval_loss': 0.8796065449714661, 'eval_accuracy': 0.875, 'eval_runtime': 1.7901, 'eval_samples_per_second': 84.913, 'eval_steps_per_second': 10.614, 'epoch': 21.0}


 55%|█████▌    | 1386/2520 [07:31<05:43,  3.30it/s]

{'loss': 0.0001, 'learning_rate': 4.5e-06, 'epoch': 22.0}


                                                   
 55%|█████▌    | 1386/2520 [07:33<05:43,  3.30it/s]

{'eval_loss': 0.8876435160636902, 'eval_accuracy': 0.875, 'eval_runtime': 1.7885, 'eval_samples_per_second': 84.988, 'eval_steps_per_second': 10.623, 'epoch': 22.0}


 57%|█████▊    | 1449/2520 [07:51<05:21,  3.34it/s]

{'loss': 0.0001, 'learning_rate': 4.25e-06, 'epoch': 23.0}


                                                   
 57%|█████▊    | 1449/2520 [07:53<05:21,  3.34it/s]

{'eval_loss': 0.8908584117889404, 'eval_accuracy': 0.875, 'eval_runtime': 1.7907, 'eval_samples_per_second': 84.882, 'eval_steps_per_second': 10.61, 'epoch': 23.0}


 60%|██████    | 1512/2520 [08:12<05:02,  3.33it/s]

{'loss': 0.0001, 'learning_rate': 4.000000000000001e-06, 'epoch': 24.0}


                                                   
 60%|██████    | 1512/2520 [08:14<05:02,  3.33it/s]

{'eval_loss': 0.8966116905212402, 'eval_accuracy': 0.875, 'eval_runtime': 1.7924, 'eval_samples_per_second': 84.8, 'eval_steps_per_second': 10.6, 'epoch': 24.0}


 62%|██████▎   | 1575/2520 [08:33<04:44,  3.33it/s]

{'loss': 0.0001, 'learning_rate': 3.7500000000000005e-06, 'epoch': 25.0}


                                                   
 62%|██████▎   | 1575/2520 [08:34<04:44,  3.33it/s]

{'eval_loss': 0.90240877866745, 'eval_accuracy': 0.875, 'eval_runtime': 1.791, 'eval_samples_per_second': 84.87, 'eval_steps_per_second': 10.609, 'epoch': 25.0}


 65%|██████▌   | 1638/2520 [08:53<04:24,  3.33it/s]

{'loss': 0.0001, 'learning_rate': 3.5e-06, 'epoch': 26.0}


                                                   
 65%|██████▌   | 1638/2520 [08:55<04:24,  3.33it/s]

{'eval_loss': 0.9100918173789978, 'eval_accuracy': 0.875, 'eval_runtime': 1.7894, 'eval_samples_per_second': 84.946, 'eval_steps_per_second': 10.618, 'epoch': 26.0}


 68%|██████▊   | 1701/2520 [09:14<04:07,  3.30it/s]

{'loss': 0.0001, 'learning_rate': 3.2500000000000002e-06, 'epoch': 27.0}


                                                   
 68%|██████▊   | 1701/2520 [09:15<04:07,  3.30it/s]

{'eval_loss': 0.9143431782722473, 'eval_accuracy': 0.875, 'eval_runtime': 1.7915, 'eval_samples_per_second': 84.845, 'eval_steps_per_second': 10.606, 'epoch': 27.0}


 70%|███████   | 1764/2520 [09:34<03:49,  3.29it/s]

{'loss': 0.0001, 'learning_rate': 3e-06, 'epoch': 28.0}


                                                   
 70%|███████   | 1764/2520 [09:36<03:49,  3.29it/s]

{'eval_loss': 0.9188351035118103, 'eval_accuracy': 0.875, 'eval_runtime': 1.7892, 'eval_samples_per_second': 84.956, 'eval_steps_per_second': 10.619, 'epoch': 28.0}


 72%|███████▎  | 1827/2520 [09:55<03:31,  3.28it/s]

{'loss': 0.0001, 'learning_rate': 2.7500000000000004e-06, 'epoch': 29.0}


                                                   
 72%|███████▎  | 1827/2520 [09:57<03:31,  3.28it/s]

{'eval_loss': 0.9222235679626465, 'eval_accuracy': 0.875, 'eval_runtime': 1.7938, 'eval_samples_per_second': 84.739, 'eval_steps_per_second': 10.592, 'epoch': 29.0}


 75%|███████▌  | 1890/2520 [10:16<03:11,  3.30it/s]

{'loss': 0.0002, 'learning_rate': 2.5e-06, 'epoch': 30.0}


                                                   
 75%|███████▌  | 1890/2520 [10:18<03:11,  3.30it/s]

{'eval_loss': 0.930414080619812, 'eval_accuracy': 0.868421052631579, 'eval_runtime': 1.7958, 'eval_samples_per_second': 84.64, 'eval_steps_per_second': 10.58, 'epoch': 30.0}


 78%|███████▊  | 1953/2520 [10:36<02:52,  3.28it/s]

{'loss': 0.0001, 'learning_rate': 2.25e-06, 'epoch': 31.0}


                                                   
 78%|███████▊  | 1953/2520 [10:38<02:52,  3.28it/s]

{'eval_loss': 0.9365752339363098, 'eval_accuracy': 0.875, 'eval_runtime': 1.792, 'eval_samples_per_second': 84.819, 'eval_steps_per_second': 10.602, 'epoch': 31.0}


 80%|████████  | 2016/2520 [10:57<02:31,  3.33it/s]

{'loss': 0.0001, 'learning_rate': 2.0000000000000003e-06, 'epoch': 32.0}


                                                   
 80%|████████  | 2016/2520 [10:59<02:31,  3.33it/s]

{'eval_loss': 0.939691424369812, 'eval_accuracy': 0.875, 'eval_runtime': 1.7931, 'eval_samples_per_second': 84.767, 'eval_steps_per_second': 10.596, 'epoch': 32.0}


 82%|████████▎ | 2079/2520 [11:18<02:13,  3.31it/s]

{'loss': 0.0001, 'learning_rate': 1.75e-06, 'epoch': 33.0}


                                                   
 82%|████████▎ | 2079/2520 [11:19<02:13,  3.31it/s]

{'eval_loss': 0.9435108304023743, 'eval_accuracy': 0.875, 'eval_runtime': 1.7939, 'eval_samples_per_second': 84.733, 'eval_steps_per_second': 10.592, 'epoch': 33.0}


 85%|████████▌ | 2142/2520 [11:38<01:53,  3.32it/s]

{'loss': 0.0001, 'learning_rate': 1.5e-06, 'epoch': 34.0}


                                                   
 85%|████████▌ | 2142/2520 [11:40<01:53,  3.32it/s]

{'eval_loss': 0.9466649293899536, 'eval_accuracy': 0.868421052631579, 'eval_runtime': 1.7922, 'eval_samples_per_second': 84.812, 'eval_steps_per_second': 10.602, 'epoch': 34.0}


 88%|████████▊ | 2205/2520 [11:59<01:36,  3.28it/s]

{'loss': 0.0001, 'learning_rate': 1.25e-06, 'epoch': 35.0}


                                                   
 88%|████████▊ | 2205/2520 [12:01<01:36,  3.28it/s]

{'eval_loss': 0.9487285614013672, 'eval_accuracy': 0.868421052631579, 'eval_runtime': 1.7938, 'eval_samples_per_second': 84.735, 'eval_steps_per_second': 10.592, 'epoch': 35.0}


 90%|█████████ | 2268/2520 [12:20<01:15,  3.32it/s]

{'loss': 0.0001, 'learning_rate': 1.0000000000000002e-06, 'epoch': 36.0}


                                                   
 90%|█████████ | 2268/2520 [12:21<01:15,  3.32it/s]

{'eval_loss': 0.9502125978469849, 'eval_accuracy': 0.875, 'eval_runtime': 1.7963, 'eval_samples_per_second': 84.62, 'eval_steps_per_second': 10.578, 'epoch': 36.0}


 92%|█████████▎| 2331/2520 [12:40<00:56,  3.33it/s]

{'loss': 0.0001, 'learning_rate': 7.5e-07, 'epoch': 37.0}


                                                   
 92%|█████████▎| 2331/2520 [12:42<00:56,  3.33it/s]

{'eval_loss': 0.9519069790840149, 'eval_accuracy': 0.868421052631579, 'eval_runtime': 1.7895, 'eval_samples_per_second': 84.942, 'eval_steps_per_second': 10.618, 'epoch': 37.0}


 95%|█████████▌| 2394/2520 [13:01<00:37,  3.33it/s]

{'loss': 0.0001, 'learning_rate': 5.000000000000001e-07, 'epoch': 38.0}


                                                   
 95%|█████████▌| 2394/2520 [13:03<00:37,  3.33it/s]

{'eval_loss': 0.9521592855453491, 'eval_accuracy': 0.875, 'eval_runtime': 1.7914, 'eval_samples_per_second': 84.85, 'eval_steps_per_second': 10.606, 'epoch': 38.0}


 98%|█████████▊| 2457/2520 [13:21<00:19,  3.30it/s]

{'loss': 0.0001, 'learning_rate': 2.5000000000000004e-07, 'epoch': 39.0}


                                                   
 98%|█████████▊| 2457/2520 [13:23<00:19,  3.30it/s]

{'eval_loss': 0.9527353048324585, 'eval_accuracy': 0.875, 'eval_runtime': 1.7924, 'eval_samples_per_second': 84.803, 'eval_steps_per_second': 10.6, 'epoch': 39.0}


100%|██████████| 2520/2520 [13:43<00:00,  3.33it/s]

{'loss': 0.0001, 'learning_rate': 0.0, 'epoch': 40.0}


                                                   
100%|██████████| 2520/2520 [13:45<00:00,  3.05it/s]

{'eval_loss': 0.9529910683631897, 'eval_accuracy': 0.875, 'eval_runtime': 1.789, 'eval_samples_per_second': 84.961, 'eval_steps_per_second': 10.62, 'epoch': 40.0}
{'train_runtime': 825.3855, 'train_samples_per_second': 24.086, 'train_steps_per_second': 3.053, 'train_loss': 0.03839224751240441, 'epoch': 40.0}
Execution Time : 825 seconds





In [15]:
trainer.evaluate()

100%|██████████| 19/19 [00:01<00:00, 11.28it/s]


{'eval_loss': 0.9529910683631897,
 'eval_accuracy': 0.875,
 'eval_runtime': 1.7794,
 'eval_samples_per_second': 85.422,
 'eval_steps_per_second': 10.678,
 'epoch': 40.0}