In [1]:
import pandas as pd

finetuned_dirname = "40-epoch-roberta-finetuned-twitter15-tf"
transformer_name = "roberta-base"

data = pd.read_csv("../../data/processed/twitter15-tf_dataset.csv", lineterminator="\n")
data = data[['tweet_text', 'tvt2', 'label']]
print(data.shape)
data.head()

(742, 3)


Unnamed: 0,tweet_text,tvt2,label
0,rip elly may clampett: so sad to learn #beverl...,training,True
1,just in: missing afghan soldiers found trying ...,training,True
2,#riphulkhogan my heart is ripping like your sh...,training,False
3,a chick-fil-a manager allegedly banned this hi...,validation,False
4,islamic tribunal using sharia law in texas has...,validation,False


In [2]:
combined_data = data

In [3]:
import torch

class CustomTextDataset(torch.utils.data.dataset.Dataset):

    def __init__(self, texts, labels):
        self.labels = labels
        self.texts = texts
        self.attention_mask = None
        self.input_ids = None
        self.token_type_ids = None

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        sample = {
            "text": self.texts[idx],
            "label": self.labels[idx],
            "attention_mask": self.attention_mask[idx] if self.attention_mask else None,
            "input_ids": self.input_ids[idx] if self.input_ids else None,
#             "token_type_ids": self.token_type_ids[idx] if self.token_type_ids else None
        }
        return sample
    
    def tokenize(self, tokenizer):
        self.attention_mask = []
        self.input_ids = []
        self.token_type_ids = []

        for text in self.texts:
            token = tokenizer(text, padding="max_length", truncation=True)
            
            self.attention_mask.append(token['attention_mask'])
            self.input_ids.append(token['input_ids'])
#             self.token_type_ids.append(token['token_type_ids'])

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
labels = []

labels_str = combined_data['label'].unique().tolist()
for i, d in combined_data.iterrows():
    lab = labels_str.index(d['label'])
    labels.append(lab)
    
print(len(labels))
labels[:10]

742


[0, 0, 1, 1, 1, 1, 1, 1, 0, 1]

In [5]:
train_dataset = CustomTextDataset(
    [d['tweet_text'] for i, d in combined_data.iterrows() if d['tvt2'] == 'training'],
    [labels[i] for i, d in combined_data.iterrows() if d['tvt2'] == 'training'])
test_dataset = CustomTextDataset(
    [d['tweet_text'] for i, d in combined_data.iterrows() if d['tvt2'] == 'validation'],
    [labels[i] for i, d in combined_data.iterrows() if d['tvt2'] == 'validation'])
train_dataset[0]

{'text': 'rip elly may clampett: so sad to learn #beverlyhillbillies star donna douglas has passed away. URL\r',
 'label': 0,
 'attention_mask': None,
 'input_ids': None}

In [6]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(transformer_name)

In [7]:
# inputs = tokenizer(["you're stuck in a timewrap from 2004 though", "summa lumma dumma lumma"], padding="max_length", truncation=True)
# for k,v in inputs.items():
#     print(k)

In [8]:
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

train_dataset.tokenize(tokenizer)
test_dataset.tokenize(tokenizer)

In [9]:
print(len(train_dataset))
print(len(test_dataset))

497
152


### Fine Tuning

In [10]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained(transformer_name,
                                                           output_hidden_states=False,
                                                           num_labels=2)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [11]:
from transformers import TrainingArguments

epochs = 40
batch_size = 8
save_steps = (round((len(train_dataset)/batch_size) + 0.5)) * epochs
# save_steps = 1_000_000

training_args = TrainingArguments(
    output_dir=f"../../data/models/{finetuned_dirname}",
    num_train_epochs=epochs,
    save_steps=save_steps,
    logging_steps=300,
    learning_rate=1e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    evaluation_strategy="epoch",
    logging_strategy="epoch"
)

print(f"Save Steps : {save_steps}")

Save Steps : 2520


In [12]:
import numpy as np
from datasets import load_metric

metric = load_metric("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

  metric = load_metric("accuracy")


In [13]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
)

In [14]:
import time

start = time.time()

trainer.train()

print(f"Execution Time : {round(time.time() - start)} seconds")

  2%|▎         | 63/2520 [00:21<12:21,  3.31it/s] 

{'loss': 0.6914, 'learning_rate': 9.75e-06, 'epoch': 1.0}


                                                 
  2%|▎         | 63/2520 [00:22<12:21,  3.31it/s]

{'eval_loss': 0.6670586466789246, 'eval_accuracy': 0.5592105263157895, 'eval_runtime': 1.7797, 'eval_samples_per_second': 85.409, 'eval_steps_per_second': 10.676, 'epoch': 1.0}


  5%|▌         | 126/2520 [00:41<12:01,  3.32it/s]

{'loss': 0.4496, 'learning_rate': 9.5e-06, 'epoch': 2.0}


                                                  
  5%|▌         | 126/2520 [00:43<12:01,  3.32it/s]

{'eval_loss': 0.40307289361953735, 'eval_accuracy': 0.8157894736842105, 'eval_runtime': 1.7382, 'eval_samples_per_second': 87.447, 'eval_steps_per_second': 10.931, 'epoch': 2.0}


  8%|▊         | 189/2520 [01:02<11:47,  3.29it/s]

{'loss': 0.1951, 'learning_rate': 9.250000000000001e-06, 'epoch': 3.0}


                                                  
  8%|▊         | 189/2520 [01:03<11:47,  3.29it/s]

{'eval_loss': 0.48536452651023865, 'eval_accuracy': 0.8421052631578947, 'eval_runtime': 1.7442, 'eval_samples_per_second': 87.144, 'eval_steps_per_second': 10.893, 'epoch': 3.0}


 10%|█         | 252/2520 [01:22<11:31,  3.28it/s]

{'loss': 0.1151, 'learning_rate': 9e-06, 'epoch': 4.0}


                                                  
 10%|█         | 252/2520 [01:24<11:31,  3.28it/s]

{'eval_loss': 0.3552047312259674, 'eval_accuracy': 0.9013157894736842, 'eval_runtime': 1.7445, 'eval_samples_per_second': 87.131, 'eval_steps_per_second': 10.891, 'epoch': 4.0}


 12%|█▎        | 315/2520 [01:43<11:09,  3.29it/s]

{'loss': 0.037, 'learning_rate': 8.750000000000001e-06, 'epoch': 5.0}


                                                  
 12%|█▎        | 315/2520 [01:45<11:09,  3.29it/s]

{'eval_loss': 0.6507043838500977, 'eval_accuracy': 0.875, 'eval_runtime': 1.7419, 'eval_samples_per_second': 87.262, 'eval_steps_per_second': 10.908, 'epoch': 5.0}


 15%|█▌        | 378/2520 [02:04<10:54,  3.27it/s]

{'loss': 0.0156, 'learning_rate': 8.5e-06, 'epoch': 6.0}


                                                  
 15%|█▌        | 378/2520 [02:05<10:54,  3.27it/s]

{'eval_loss': 0.6680110692977905, 'eval_accuracy': 0.881578947368421, 'eval_runtime': 1.7438, 'eval_samples_per_second': 87.166, 'eval_steps_per_second': 10.896, 'epoch': 6.0}


 18%|█▊        | 441/2520 [02:24<10:35,  3.27it/s]

{'loss': 0.0074, 'learning_rate': 8.25e-06, 'epoch': 7.0}


                                                  
 18%|█▊        | 441/2520 [02:26<10:35,  3.27it/s]

{'eval_loss': 0.7135462760925293, 'eval_accuracy': 0.875, 'eval_runtime': 1.7412, 'eval_samples_per_second': 87.294, 'eval_steps_per_second': 10.912, 'epoch': 7.0}


 20%|██        | 504/2520 [02:45<10:10,  3.30it/s]

{'loss': 0.0005, 'learning_rate': 8.000000000000001e-06, 'epoch': 8.0}


                                                  
 20%|██        | 504/2520 [02:47<10:10,  3.30it/s]

{'eval_loss': 0.7862262725830078, 'eval_accuracy': 0.868421052631579, 'eval_runtime': 1.7616, 'eval_samples_per_second': 86.286, 'eval_steps_per_second': 10.786, 'epoch': 8.0}


 22%|██▎       | 567/2520 [03:06<09:54,  3.28it/s]

{'loss': 0.0146, 'learning_rate': 7.75e-06, 'epoch': 9.0}


                                                  
 22%|██▎       | 567/2520 [03:08<09:54,  3.28it/s]

{'eval_loss': 0.6094309687614441, 'eval_accuracy': 0.881578947368421, 'eval_runtime': 1.7481, 'eval_samples_per_second': 86.95, 'eval_steps_per_second': 10.869, 'epoch': 9.0}


 25%|██▌       | 630/2520 [03:27<09:32,  3.30it/s]

{'loss': 0.0002, 'learning_rate': 7.500000000000001e-06, 'epoch': 10.0}


                                                  
 25%|██▌       | 630/2520 [03:28<09:32,  3.30it/s]

{'eval_loss': 0.771842360496521, 'eval_accuracy': 0.8881578947368421, 'eval_runtime': 1.7438, 'eval_samples_per_second': 87.164, 'eval_steps_per_second': 10.896, 'epoch': 10.0}


 28%|██▊       | 693/2520 [03:47<09:17,  3.28it/s]

{'loss': 0.0002, 'learning_rate': 7.25e-06, 'epoch': 11.0}


                                                  
 28%|██▊       | 693/2520 [03:49<09:17,  3.28it/s]

{'eval_loss': 0.5988942980766296, 'eval_accuracy': 0.8947368421052632, 'eval_runtime': 1.7481, 'eval_samples_per_second': 86.953, 'eval_steps_per_second': 10.869, 'epoch': 11.0}


 30%|███       | 756/2520 [04:08<08:53,  3.31it/s]

{'loss': 0.0006, 'learning_rate': 7e-06, 'epoch': 12.0}


                                                  
 30%|███       | 756/2520 [04:10<08:53,  3.31it/s]

{'eval_loss': 0.8195921182632446, 'eval_accuracy': 0.875, 'eval_runtime': 1.7465, 'eval_samples_per_second': 87.033, 'eval_steps_per_second': 10.879, 'epoch': 12.0}


 32%|███▎      | 819/2520 [04:29<08:38,  3.28it/s]

{'loss': 0.0001, 'learning_rate': 6.750000000000001e-06, 'epoch': 13.0}


                                                  
 32%|███▎      | 819/2520 [04:30<08:38,  3.28it/s]

{'eval_loss': 0.7822567224502563, 'eval_accuracy': 0.8947368421052632, 'eval_runtime': 1.7533, 'eval_samples_per_second': 86.691, 'eval_steps_per_second': 10.836, 'epoch': 13.0}


 35%|███▌      | 882/2520 [04:49<08:19,  3.28it/s]

{'loss': 0.0001, 'learning_rate': 6.5000000000000004e-06, 'epoch': 14.0}


                                                  
 35%|███▌      | 882/2520 [04:51<08:19,  3.28it/s]

{'eval_loss': 0.7547312378883362, 'eval_accuracy': 0.9013157894736842, 'eval_runtime': 1.7602, 'eval_samples_per_second': 86.352, 'eval_steps_per_second': 10.794, 'epoch': 14.0}


 38%|███▊      | 945/2520 [05:10<08:02,  3.26it/s]

{'loss': 0.0001, 'learning_rate': 6.25e-06, 'epoch': 15.0}


                                                  
 38%|███▊      | 945/2520 [05:12<08:02,  3.26it/s]

{'eval_loss': 0.7724812030792236, 'eval_accuracy': 0.9013157894736842, 'eval_runtime': 1.7528, 'eval_samples_per_second': 86.72, 'eval_steps_per_second': 10.84, 'epoch': 15.0}


 40%|████      | 1008/2520 [05:31<07:38,  3.30it/s]

{'loss': 0.0156, 'learning_rate': 6e-06, 'epoch': 16.0}


                                                   
 40%|████      | 1008/2520 [05:33<07:38,  3.30it/s]

{'eval_loss': 0.5516278743743896, 'eval_accuracy': 0.9078947368421053, 'eval_runtime': 1.749, 'eval_samples_per_second': 86.905, 'eval_steps_per_second': 10.863, 'epoch': 16.0}


 42%|████▎     | 1071/2520 [05:52<07:20,  3.29it/s]

{'loss': 0.0173, 'learning_rate': 5.75e-06, 'epoch': 17.0}


                                                   
 42%|████▎     | 1071/2520 [05:53<07:20,  3.29it/s]

{'eval_loss': 0.5406667590141296, 'eval_accuracy': 0.9144736842105263, 'eval_runtime': 1.7556, 'eval_samples_per_second': 86.579, 'eval_steps_per_second': 10.822, 'epoch': 17.0}


 45%|████▌     | 1134/2520 [06:12<07:04,  3.27it/s]

{'loss': 0.0091, 'learning_rate': 5.500000000000001e-06, 'epoch': 18.0}


                                                   
 45%|████▌     | 1134/2520 [06:14<07:04,  3.27it/s]

{'eval_loss': 0.9921433329582214, 'eval_accuracy': 0.875, 'eval_runtime': 1.7472, 'eval_samples_per_second': 86.994, 'eval_steps_per_second': 10.874, 'epoch': 18.0}


 48%|████▊     | 1197/2520 [06:33<06:41,  3.29it/s]

{'loss': 0.0002, 'learning_rate': 5.2500000000000006e-06, 'epoch': 19.0}


                                                   
 48%|████▊     | 1197/2520 [06:35<06:41,  3.29it/s]

{'eval_loss': 0.6838080883026123, 'eval_accuracy': 0.8881578947368421, 'eval_runtime': 1.7528, 'eval_samples_per_second': 86.721, 'eval_steps_per_second': 10.84, 'epoch': 19.0}


 50%|█████     | 1260/2520 [06:54<06:26,  3.26it/s]

{'loss': 0.0001, 'learning_rate': 5e-06, 'epoch': 20.0}


                                                   
 50%|█████     | 1260/2520 [06:56<06:26,  3.26it/s]

{'eval_loss': 0.6938369870185852, 'eval_accuracy': 0.881578947368421, 'eval_runtime': 1.76, 'eval_samples_per_second': 86.363, 'eval_steps_per_second': 10.795, 'epoch': 20.0}


 52%|█████▎    | 1323/2520 [07:15<06:03,  3.29it/s]

{'loss': 0.0001, 'learning_rate': 4.75e-06, 'epoch': 21.0}


                                                   
 52%|█████▎    | 1323/2520 [07:17<06:03,  3.29it/s]

{'eval_loss': 0.8676952719688416, 'eval_accuracy': 0.8881578947368421, 'eval_runtime': 1.7607, 'eval_samples_per_second': 86.33, 'eval_steps_per_second': 10.791, 'epoch': 21.0}


 55%|█████▌    | 1386/2520 [07:36<05:47,  3.26it/s]

{'loss': 0.0001, 'learning_rate': 4.5e-06, 'epoch': 22.0}


                                                   
 55%|█████▌    | 1386/2520 [07:37<05:47,  3.26it/s]

{'eval_loss': 0.7898342609405518, 'eval_accuracy': 0.8947368421052632, 'eval_runtime': 1.7534, 'eval_samples_per_second': 86.689, 'eval_steps_per_second': 10.836, 'epoch': 22.0}


 57%|█████▊    | 1449/2520 [07:56<05:26,  3.28it/s]

{'loss': 0.0001, 'learning_rate': 4.25e-06, 'epoch': 23.0}


                                                   
 57%|█████▊    | 1449/2520 [07:58<05:26,  3.28it/s]

{'eval_loss': 0.7030028104782104, 'eval_accuracy': 0.8881578947368421, 'eval_runtime': 1.7595, 'eval_samples_per_second': 86.386, 'eval_steps_per_second': 10.798, 'epoch': 23.0}


 60%|██████    | 1512/2520 [08:17<05:09,  3.25it/s]

{'loss': 0.0001, 'learning_rate': 4.000000000000001e-06, 'epoch': 24.0}


                                                   
 60%|██████    | 1512/2520 [08:19<05:09,  3.25it/s]

{'eval_loss': 0.6820435523986816, 'eval_accuracy': 0.881578947368421, 'eval_runtime': 1.7562, 'eval_samples_per_second': 86.55, 'eval_steps_per_second': 10.819, 'epoch': 24.0}


 62%|██████▎   | 1575/2520 [08:38<04:48,  3.28it/s]

{'loss': 0.0001, 'learning_rate': 3.7500000000000005e-06, 'epoch': 25.0}


                                                   
 62%|██████▎   | 1575/2520 [08:40<04:48,  3.28it/s]

{'eval_loss': 0.6808127164840698, 'eval_accuracy': 0.8947368421052632, 'eval_runtime': 1.7541, 'eval_samples_per_second': 86.656, 'eval_steps_per_second': 10.832, 'epoch': 25.0}


 65%|██████▌   | 1638/2520 [08:59<04:28,  3.29it/s]

{'loss': 0.0001, 'learning_rate': 3.5e-06, 'epoch': 26.0}


                                                   
 65%|██████▌   | 1638/2520 [09:00<04:28,  3.29it/s]

{'eval_loss': 0.9415512084960938, 'eval_accuracy': 0.881578947368421, 'eval_runtime': 1.7512, 'eval_samples_per_second': 86.798, 'eval_steps_per_second': 10.85, 'epoch': 26.0}


 68%|██████▊   | 1701/2520 [09:20<04:13,  3.23it/s]

{'loss': 0.0001, 'learning_rate': 3.2500000000000002e-06, 'epoch': 27.0}


                                                   
 68%|██████▊   | 1701/2520 [09:21<04:13,  3.23it/s]

{'eval_loss': 0.7085499167442322, 'eval_accuracy': 0.8947368421052632, 'eval_runtime': 1.754, 'eval_samples_per_second': 86.66, 'eval_steps_per_second': 10.832, 'epoch': 27.0}


 70%|███████   | 1764/2520 [09:40<03:51,  3.27it/s]

{'loss': 0.0001, 'learning_rate': 3e-06, 'epoch': 28.0}


                                                   
 70%|███████   | 1764/2520 [09:42<03:51,  3.27it/s]

{'eval_loss': 0.7113043069839478, 'eval_accuracy': 0.8947368421052632, 'eval_runtime': 1.7481, 'eval_samples_per_second': 86.952, 'eval_steps_per_second': 10.869, 'epoch': 28.0}


 72%|███████▎  | 1827/2520 [10:01<03:31,  3.27it/s]

{'loss': 0.0001, 'learning_rate': 2.7500000000000004e-06, 'epoch': 29.0}


                                                   
 72%|███████▎  | 1827/2520 [10:03<03:31,  3.27it/s]

{'eval_loss': 0.7143552303314209, 'eval_accuracy': 0.8947368421052632, 'eval_runtime': 1.7469, 'eval_samples_per_second': 87.011, 'eval_steps_per_second': 10.876, 'epoch': 29.0}


 75%|███████▌  | 1890/2520 [10:22<03:14,  3.25it/s]

{'loss': 0.0, 'learning_rate': 2.5e-06, 'epoch': 30.0}


                                                   
 75%|███████▌  | 1890/2520 [10:24<03:14,  3.25it/s]

{'eval_loss': 0.7203847765922546, 'eval_accuracy': 0.8947368421052632, 'eval_runtime': 1.7433, 'eval_samples_per_second': 87.19, 'eval_steps_per_second': 10.899, 'epoch': 30.0}


 78%|███████▊  | 1953/2520 [10:43<02:52,  3.29it/s]

{'loss': 0.0, 'learning_rate': 2.25e-06, 'epoch': 31.0}


                                                   
 78%|███████▊  | 1953/2520 [10:44<02:52,  3.29it/s]

{'eval_loss': 0.7215164303779602, 'eval_accuracy': 0.8947368421052632, 'eval_runtime': 1.7494, 'eval_samples_per_second': 86.886, 'eval_steps_per_second': 10.861, 'epoch': 31.0}


 80%|████████  | 2016/2520 [11:03<02:34,  3.25it/s]

{'loss': 0.0, 'learning_rate': 2.0000000000000003e-06, 'epoch': 32.0}


                                                   
 80%|████████  | 2016/2520 [11:05<02:34,  3.25it/s]

{'eval_loss': 0.722625732421875, 'eval_accuracy': 0.8947368421052632, 'eval_runtime': 1.75, 'eval_samples_per_second': 86.858, 'eval_steps_per_second': 10.857, 'epoch': 32.0}


 82%|████████▎ | 2079/2520 [11:24<02:15,  3.26it/s]

{'loss': 0.0001, 'learning_rate': 1.75e-06, 'epoch': 33.0}


                                                   
 82%|████████▎ | 2079/2520 [11:26<02:15,  3.26it/s]

{'eval_loss': 1.0744044780731201, 'eval_accuracy': 0.881578947368421, 'eval_runtime': 1.7491, 'eval_samples_per_second': 86.904, 'eval_steps_per_second': 10.863, 'epoch': 33.0}


 85%|████████▌ | 2142/2520 [11:45<01:55,  3.28it/s]

{'loss': 0.0, 'learning_rate': 1.5e-06, 'epoch': 34.0}


                                                   
 85%|████████▌ | 2142/2520 [11:47<01:55,  3.28it/s]

{'eval_loss': 1.0483185052871704, 'eval_accuracy': 0.875, 'eval_runtime': 1.7552, 'eval_samples_per_second': 86.601, 'eval_steps_per_second': 10.825, 'epoch': 34.0}


 88%|████████▊ | 2205/2520 [12:05<01:35,  3.30it/s]

{'loss': 0.0, 'learning_rate': 1.25e-06, 'epoch': 35.0}


                                                   
 88%|████████▊ | 2205/2520 [12:07<01:35,  3.30it/s]

{'eval_loss': 0.8915978670120239, 'eval_accuracy': 0.8881578947368421, 'eval_runtime': 1.7497, 'eval_samples_per_second': 86.87, 'eval_steps_per_second': 10.859, 'epoch': 35.0}


 90%|█████████ | 2268/2520 [12:26<01:16,  3.29it/s]

{'loss': 0.0001, 'learning_rate': 1.0000000000000002e-06, 'epoch': 36.0}


                                                   
 90%|█████████ | 2268/2520 [12:28<01:16,  3.29it/s]

{'eval_loss': 0.7662180662155151, 'eval_accuracy': 0.8947368421052632, 'eval_runtime': 1.7461, 'eval_samples_per_second': 87.053, 'eval_steps_per_second': 10.882, 'epoch': 36.0}


 92%|█████████▎| 2331/2520 [12:47<00:57,  3.28it/s]

{'loss': 0.0, 'learning_rate': 7.5e-07, 'epoch': 37.0}


                                                   
 92%|█████████▎| 2331/2520 [12:49<00:57,  3.28it/s]

{'eval_loss': 0.6647035479545593, 'eval_accuracy': 0.9013157894736842, 'eval_runtime': 1.7495, 'eval_samples_per_second': 86.882, 'eval_steps_per_second': 10.86, 'epoch': 37.0}


 95%|█████████▌| 2394/2520 [13:08<00:38,  3.25it/s]

{'loss': 0.0, 'learning_rate': 5.000000000000001e-07, 'epoch': 38.0}


                                                   
 95%|█████████▌| 2394/2520 [13:09<00:38,  3.25it/s]

{'eval_loss': 0.6797041893005371, 'eval_accuracy': 0.9013157894736842, 'eval_runtime': 1.75, 'eval_samples_per_second': 86.856, 'eval_steps_per_second': 10.857, 'epoch': 38.0}


 98%|█████████▊| 2457/2520 [13:28<00:19,  3.28it/s]

{'loss': 0.0, 'learning_rate': 2.5000000000000004e-07, 'epoch': 39.0}


                                                   
 98%|█████████▊| 2457/2520 [13:30<00:19,  3.28it/s]

{'eval_loss': 0.7072192430496216, 'eval_accuracy': 0.8947368421052632, 'eval_runtime': 1.7421, 'eval_samples_per_second': 87.253, 'eval_steps_per_second': 10.907, 'epoch': 39.0}


100%|██████████| 2520/2520 [13:51<00:00,  3.27it/s]

{'loss': 0.0, 'learning_rate': 0.0, 'epoch': 40.0}


                                                   
100%|██████████| 2520/2520 [13:52<00:00,  3.03it/s]

{'eval_loss': 0.7068711519241333, 'eval_accuracy': 0.8947368421052632, 'eval_runtime': 1.7408, 'eval_samples_per_second': 87.316, 'eval_steps_per_second': 10.915, 'epoch': 40.0}
{'train_runtime': 832.9727, 'train_samples_per_second': 23.866, 'train_steps_per_second': 3.025, 'train_loss': 0.0392798810243784, 'epoch': 40.0}
Execution Time : 833 seconds





In [15]:
trainer.evaluate()

100%|██████████| 19/19 [00:01<00:00, 11.48it/s]


{'eval_loss': 0.7068711519241333,
 'eval_accuracy': 0.8947368421052632,
 'eval_runtime': 1.7483,
 'eval_samples_per_second': 86.94,
 'eval_steps_per_second': 10.868,
 'epoch': 40.0}

## 