In [1]:
import pandas as pd

finetuned_dirname = "40-epoch-roberta-finetuned-twitter15-rnr"
transformer_name = "roberta-base"

data = pd.read_csv("../../data/processed/twitter15_dataset_with_tvt.csv", lineterminator="\n")
data = data[['tweet_text', 'tvt2', 'label']]
print(data.shape)
data.head()

(1490, 3)


Unnamed: 0,tweet_text,tvt2,label
0,🔥ca kkk grand wizard 🔥 endorses @hillaryclinto...,training,unverified
1,an open letter to trump voters from his top st...,training,unverified
2,america is a nation of second chances —@potus ...,validation,non-rumor
3,"brandon marshall visits and offers advice, sup...",testting,non-rumor
4,rip elly may clampett: so sad to learn #beverl...,validation,true


In [2]:
combined_data = data

In [3]:
import torch

class CustomTextDataset(torch.utils.data.dataset.Dataset):

    def __init__(self, texts, labels):
        self.labels = labels
        self.texts = texts
        self.attention_mask = None
        self.input_ids = None
        self.token_type_ids = None

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        sample = {
            "text": self.texts[idx],
            "label": self.labels[idx],
            "attention_mask": self.attention_mask[idx] if self.attention_mask else None,
            "input_ids": self.input_ids[idx] if self.input_ids else None,
#             "token_type_ids": self.token_type_ids[idx] if self.token_type_ids else None
        }
        return sample
    
    def tokenize(self, tokenizer):
        self.attention_mask = []
        self.input_ids = []
        self.token_type_ids = []

        for text in self.texts:
            token = tokenizer(text, padding="max_length", truncation=True)
            
            self.attention_mask.append(token['attention_mask'])
            self.input_ids.append(token['input_ids'])
#             self.token_type_ids.append(token['token_type_ids'])

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
labels = []

labels_str = combined_data['label'].unique().tolist()
for i, d in combined_data.iterrows():
#     lab = labels_str.index(d['label'])
    if d['label'] in ['true', 'false', 'unverified']:
        lab = 0
    else:
        lab = 1
    labels.append(lab)
    
print(len(labels))
labels[:10]

1490


[0, 0, 1, 1, 0, 1, 0, 0, 0, 0]

In [5]:
train_dataset = CustomTextDataset(
    [d['tweet_text'] for i, d in combined_data.iterrows() if d['tvt2'] == 'training'],
    [labels[i] for i, d in combined_data.iterrows() if d['tvt2'] == 'training'])
test_dataset = CustomTextDataset(
    [d['tweet_text'] for i, d in combined_data.iterrows() if d['tvt2'] == 'validation'],
    [labels[i] for i, d in combined_data.iterrows() if d['tvt2'] == 'validation'])
train_dataset[0]

{'text': '🔥ca kkk grand wizard 🔥 endorses @hillaryclinton #neverhillary #trump2016 URL\r',
 'label': 0,
 'attention_mask': None,
 'input_ids': None}

In [6]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(transformer_name)

In [7]:
# inputs = tokenizer(["you're stuck in a timewrap from 2004 though", "summa lumma dumma lumma"], padding="max_length", truncation=True)
# for k,v in inputs.items():
#     print(k)

In [8]:
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

train_dataset.tokenize(tokenizer)
test_dataset.tokenize(tokenizer)

In [9]:
print(len(train_dataset))
print(len(test_dataset))

1024
338


### Fine Tuning

In [10]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained(transformer_name,
                                                           output_hidden_states=False,
                                                           num_labels=2)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [11]:
from transformers import TrainingArguments

epochs = 40
batch_size = 8
save_steps = (round((len(train_dataset)/batch_size) + 0.5)) * epochs
# save_steps = 1_000_000

training_args = TrainingArguments(
    output_dir=f"../../data/models/{finetuned_dirname}",
    num_train_epochs=epochs,
    save_steps=save_steps,
    logging_steps=300,
    learning_rate=1e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    evaluation_strategy="epoch",
    logging_strategy="epoch"
)

print(f"Save Steps : {save_steps}")

Save Steps : 5120


In [12]:
import numpy as np
from datasets import load_metric

metric = load_metric("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

  metric = load_metric("accuracy")


In [13]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
)

In [14]:
import time

start = time.time()

trainer.train()

print(f"Execution Time : {round(time.time() - start)} seconds")

  2%|▎         | 128/5120 [00:40<25:00,  3.33it/s]

{'loss': 0.5442, 'learning_rate': 9.75e-06, 'epoch': 1.0}


                                                  
  2%|▎         | 128/5120 [00:44<25:00,  3.33it/s]

{'eval_loss': 0.4585843086242676, 'eval_accuracy': 0.7603550295857988, 'eval_runtime': 3.8626, 'eval_samples_per_second': 87.506, 'eval_steps_per_second': 11.132, 'epoch': 1.0}


  5%|▌         | 256/5120 [01:23<24:21,  3.33it/s]  

{'loss': 0.3608, 'learning_rate': 9.5e-06, 'epoch': 2.0}


                                                  
  5%|▌         | 256/5120 [01:27<24:21,  3.33it/s]

{'eval_loss': 0.42430809140205383, 'eval_accuracy': 0.8136094674556213, 'eval_runtime': 3.8393, 'eval_samples_per_second': 88.038, 'eval_steps_per_second': 11.2, 'epoch': 2.0}


  8%|▊         | 384/5120 [02:05<23:50,  3.31it/s]  

{'loss': 0.2365, 'learning_rate': 9.250000000000001e-06, 'epoch': 3.0}


                                                  
  8%|▊         | 384/5120 [02:09<23:50,  3.31it/s]

{'eval_loss': 0.6045680046081543, 'eval_accuracy': 0.8106508875739645, 'eval_runtime': 3.8488, 'eval_samples_per_second': 87.819, 'eval_steps_per_second': 11.172, 'epoch': 3.0}


 10%|█         | 512/5120 [02:48<23:03,  3.33it/s]  

{'loss': 0.1577, 'learning_rate': 9e-06, 'epoch': 4.0}


                                                  
 10%|█         | 512/5120 [02:52<23:03,  3.33it/s]

{'eval_loss': 0.6654254794120789, 'eval_accuracy': 0.8668639053254438, 'eval_runtime': 3.8604, 'eval_samples_per_second': 87.557, 'eval_steps_per_second': 11.139, 'epoch': 4.0}


 12%|█▎        | 640/5120 [03:31<22:42,  3.29it/s]  

{'loss': 0.0778, 'learning_rate': 8.750000000000001e-06, 'epoch': 5.0}


                                                  
 12%|█▎        | 640/5120 [03:35<22:42,  3.29it/s]

{'eval_loss': 0.8650333881378174, 'eval_accuracy': 0.8727810650887574, 'eval_runtime': 3.8652, 'eval_samples_per_second': 87.446, 'eval_steps_per_second': 11.125, 'epoch': 5.0}


 15%|█▌        | 768/5120 [04:14<22:12,  3.27it/s]  

{'loss': 0.0501, 'learning_rate': 8.5e-06, 'epoch': 6.0}


                                                  
 15%|█▌        | 768/5120 [04:18<22:12,  3.27it/s]

{'eval_loss': 0.7849501371383667, 'eval_accuracy': 0.8846153846153846, 'eval_runtime': 3.867, 'eval_samples_per_second': 87.406, 'eval_steps_per_second': 11.12, 'epoch': 6.0}


 18%|█▊        | 896/5120 [04:56<21:11,  3.32it/s]  

{'loss': 0.0063, 'learning_rate': 8.25e-06, 'epoch': 7.0}


                                                  
 18%|█▊        | 896/5120 [05:00<21:11,  3.32it/s]

{'eval_loss': 0.9660314321517944, 'eval_accuracy': 0.8757396449704142, 'eval_runtime': 3.8702, 'eval_samples_per_second': 87.334, 'eval_steps_per_second': 11.111, 'epoch': 7.0}


 20%|██        | 1024/5120 [05:39<20:34,  3.32it/s] 

{'loss': 0.0239, 'learning_rate': 8.000000000000001e-06, 'epoch': 8.0}


                                                   
 20%|██        | 1024/5120 [05:43<20:34,  3.32it/s]

{'eval_loss': 1.020455002784729, 'eval_accuracy': 0.8668639053254438, 'eval_runtime': 3.8695, 'eval_samples_per_second': 87.349, 'eval_steps_per_second': 11.112, 'epoch': 8.0}


 22%|██▎       | 1152/5120 [06:22<20:07,  3.29it/s]  

{'loss': 0.0203, 'learning_rate': 7.75e-06, 'epoch': 9.0}


                                                   
 22%|██▎       | 1152/5120 [06:26<20:07,  3.29it/s]

{'eval_loss': 0.9959328770637512, 'eval_accuracy': 0.8757396449704142, 'eval_runtime': 3.8701, 'eval_samples_per_second': 87.335, 'eval_steps_per_second': 11.111, 'epoch': 9.0}


 25%|██▌       | 1280/5120 [07:05<19:33,  3.27it/s]  

{'loss': 0.0192, 'learning_rate': 7.500000000000001e-06, 'epoch': 10.0}


                                                   
 25%|██▌       | 1280/5120 [07:08<19:33,  3.27it/s]

{'eval_loss': 0.993878960609436, 'eval_accuracy': 0.878698224852071, 'eval_runtime': 3.8732, 'eval_samples_per_second': 87.266, 'eval_steps_per_second': 11.102, 'epoch': 10.0}


 28%|██▊       | 1408/5120 [07:48<18:56,  3.27it/s]  

{'loss': 0.0035, 'learning_rate': 7.25e-06, 'epoch': 11.0}


                                                   
 28%|██▊       | 1408/5120 [07:51<18:56,  3.27it/s]

{'eval_loss': 1.148733139038086, 'eval_accuracy': 0.8727810650887574, 'eval_runtime': 3.8886, 'eval_samples_per_second': 86.92, 'eval_steps_per_second': 11.058, 'epoch': 11.0}


 30%|███       | 1536/5120 [08:31<18:25,  3.24it/s]  

{'loss': 0.0226, 'learning_rate': 7e-06, 'epoch': 12.0}


                                                   
 30%|███       | 1536/5120 [08:35<18:25,  3.24it/s]

{'eval_loss': 1.0494848489761353, 'eval_accuracy': 0.8668639053254438, 'eval_runtime': 3.8823, 'eval_samples_per_second': 87.062, 'eval_steps_per_second': 11.076, 'epoch': 12.0}


 32%|███▎      | 1664/5120 [09:14<17:39,  3.26it/s]  

{'loss': 0.01, 'learning_rate': 6.750000000000001e-06, 'epoch': 13.0}


                                                   
 32%|███▎      | 1664/5120 [09:18<17:39,  3.26it/s]

{'eval_loss': 1.1900442838668823, 'eval_accuracy': 0.8609467455621301, 'eval_runtime': 3.8866, 'eval_samples_per_second': 86.965, 'eval_steps_per_second': 11.064, 'epoch': 13.0}


 35%|███▌      | 1792/5120 [09:57<16:59,  3.26it/s]  

{'loss': 0.0098, 'learning_rate': 6.5000000000000004e-06, 'epoch': 14.0}


                                                   
 35%|███▌      | 1792/5120 [10:01<16:59,  3.26it/s]

{'eval_loss': 1.1389658451080322, 'eval_accuracy': 0.8550295857988166, 'eval_runtime': 3.8813, 'eval_samples_per_second': 87.085, 'eval_steps_per_second': 11.079, 'epoch': 14.0}


 38%|███▊      | 1920/5120 [10:40<16:20,  3.26it/s]  

{'loss': 0.0092, 'learning_rate': 6.25e-06, 'epoch': 15.0}


                                                   
 38%|███▊      | 1920/5120 [10:44<16:20,  3.26it/s]

{'eval_loss': 1.0948587656021118, 'eval_accuracy': 0.8698224852071006, 'eval_runtime': 3.8767, 'eval_samples_per_second': 87.188, 'eval_steps_per_second': 11.092, 'epoch': 15.0}


 40%|████      | 2048/5120 [11:23<15:47,  3.24it/s]  

{'loss': 0.0, 'learning_rate': 6e-06, 'epoch': 16.0}


                                                   
 40%|████      | 2048/5120 [11:27<15:47,  3.24it/s]

{'eval_loss': 1.168662667274475, 'eval_accuracy': 0.8727810650887574, 'eval_runtime': 3.8844, 'eval_samples_per_second': 87.014, 'eval_steps_per_second': 11.07, 'epoch': 16.0}


 42%|████▎     | 2176/5120 [12:06<15:05,  3.25it/s]  

{'loss': 0.0078, 'learning_rate': 5.75e-06, 'epoch': 17.0}


                                                   
 42%|████▎     | 2176/5120 [12:10<15:05,  3.25it/s]

{'eval_loss': 1.1896169185638428, 'eval_accuracy': 0.8698224852071006, 'eval_runtime': 3.8903, 'eval_samples_per_second': 86.883, 'eval_steps_per_second': 11.053, 'epoch': 17.0}


 45%|████▌     | 2304/5120 [12:49<14:16,  3.29it/s]  

{'loss': 0.0001, 'learning_rate': 5.500000000000001e-06, 'epoch': 18.0}


                                                   
 45%|████▌     | 2304/5120 [12:53<14:16,  3.29it/s]

{'eval_loss': 1.210293173789978, 'eval_accuracy': 0.8727810650887574, 'eval_runtime': 3.8865, 'eval_samples_per_second': 86.967, 'eval_steps_per_second': 11.064, 'epoch': 18.0}


 48%|████▊     | 2432/5120 [13:32<13:51,  3.23it/s]  

{'loss': 0.0173, 'learning_rate': 5.2500000000000006e-06, 'epoch': 19.0}


                                                   
 48%|████▊     | 2432/5120 [13:36<13:51,  3.23it/s]

{'eval_loss': 1.2555384635925293, 'eval_accuracy': 0.8698224852071006, 'eval_runtime': 3.8802, 'eval_samples_per_second': 87.108, 'eval_steps_per_second': 11.082, 'epoch': 19.0}


 50%|█████     | 2560/5120 [14:16<13:00,  3.28it/s]  

{'loss': 0.0025, 'learning_rate': 5e-06, 'epoch': 20.0}


                                                   
 50%|█████     | 2560/5120 [14:19<13:00,  3.28it/s]

{'eval_loss': 1.2676868438720703, 'eval_accuracy': 0.8609467455621301, 'eval_runtime': 3.8833, 'eval_samples_per_second': 87.039, 'eval_steps_per_second': 11.073, 'epoch': 20.0}


 52%|█████▎    | 2688/5120 [14:59<12:12,  3.32it/s]  

{'loss': 0.0, 'learning_rate': 4.75e-06, 'epoch': 21.0}


                                                   
 52%|█████▎    | 2688/5120 [15:02<12:12,  3.32it/s]

{'eval_loss': 1.2472997903823853, 'eval_accuracy': 0.863905325443787, 'eval_runtime': 3.8782, 'eval_samples_per_second': 87.153, 'eval_steps_per_second': 11.087, 'epoch': 21.0}


 55%|█████▌    | 2816/5120 [15:42<11:45,  3.26it/s]

{'loss': 0.0295, 'learning_rate': 4.5e-06, 'epoch': 22.0}


                                                   
 55%|█████▌    | 2816/5120 [15:46<11:45,  3.26it/s]

{'eval_loss': 1.2310866117477417, 'eval_accuracy': 0.8579881656804734, 'eval_runtime': 3.8871, 'eval_samples_per_second': 86.954, 'eval_steps_per_second': 11.062, 'epoch': 22.0}


 57%|█████▊    | 2944/5120 [16:25<11:06,  3.26it/s]

{'loss': 0.0151, 'learning_rate': 4.25e-06, 'epoch': 23.0}


                                                   
 57%|█████▊    | 2944/5120 [16:29<11:06,  3.26it/s]

{'eval_loss': 1.1589807271957397, 'eval_accuracy': 0.8609467455621301, 'eval_runtime': 3.8937, 'eval_samples_per_second': 86.806, 'eval_steps_per_second': 11.043, 'epoch': 23.0}


 60%|██████    | 3072/5120 [17:08<10:33,  3.23it/s]

{'loss': 0.0012, 'learning_rate': 4.000000000000001e-06, 'epoch': 24.0}


                                                   
 60%|██████    | 3072/5120 [17:12<10:33,  3.23it/s]

{'eval_loss': 1.261854887008667, 'eval_accuracy': 0.8698224852071006, 'eval_runtime': 3.8837, 'eval_samples_per_second': 87.031, 'eval_steps_per_second': 11.072, 'epoch': 24.0}


 62%|██████▎   | 3200/5120 [17:51<09:45,  3.28it/s]

{'loss': 0.0039, 'learning_rate': 3.7500000000000005e-06, 'epoch': 25.0}


                                                   
 62%|██████▎   | 3200/5120 [17:55<09:45,  3.28it/s]

{'eval_loss': 1.3160068988800049, 'eval_accuracy': 0.878698224852071, 'eval_runtime': 3.8832, 'eval_samples_per_second': 87.041, 'eval_steps_per_second': 11.073, 'epoch': 25.0}


 65%|██████▌   | 3328/5120 [18:34<09:00,  3.31it/s]

{'loss': 0.0, 'learning_rate': 3.5e-06, 'epoch': 26.0}


                                                   
 65%|██████▌   | 3328/5120 [18:38<09:00,  3.31it/s]

{'eval_loss': 1.2889068126678467, 'eval_accuracy': 0.8757396449704142, 'eval_runtime': 3.8817, 'eval_samples_per_second': 87.074, 'eval_steps_per_second': 11.078, 'epoch': 26.0}


 68%|██████▊   | 3456/5120 [19:17<08:32,  3.25it/s]

{'loss': 0.0, 'learning_rate': 3.2500000000000002e-06, 'epoch': 27.0}


                                                   
 68%|██████▊   | 3456/5120 [19:21<08:32,  3.25it/s]

{'eval_loss': 1.263430118560791, 'eval_accuracy': 0.863905325443787, 'eval_runtime': 3.895, 'eval_samples_per_second': 86.777, 'eval_steps_per_second': 11.04, 'epoch': 27.0}


 70%|███████   | 3584/5120 [20:01<07:51,  3.26it/s]

{'loss': 0.0, 'learning_rate': 3e-06, 'epoch': 28.0}


                                                   
 70%|███████   | 3584/5120 [20:05<07:51,  3.26it/s]

{'eval_loss': 1.2725311517715454, 'eval_accuracy': 0.8698224852071006, 'eval_runtime': 3.8886, 'eval_samples_per_second': 86.922, 'eval_steps_per_second': 11.058, 'epoch': 28.0}


 72%|███████▎  | 3712/5120 [20:44<07:11,  3.26it/s]

{'loss': 0.0001, 'learning_rate': 2.7500000000000004e-06, 'epoch': 29.0}


                                                   
 72%|███████▎  | 3712/5120 [20:48<07:11,  3.26it/s]

{'eval_loss': 1.3102093935012817, 'eval_accuracy': 0.878698224852071, 'eval_runtime': 3.8913, 'eval_samples_per_second': 86.861, 'eval_steps_per_second': 11.05, 'epoch': 29.0}


 75%|███████▌  | 3840/5120 [21:27<06:31,  3.27it/s]

{'loss': 0.0177, 'learning_rate': 2.5e-06, 'epoch': 30.0}


                                                   
 75%|███████▌  | 3840/5120 [21:31<06:31,  3.27it/s]

{'eval_loss': 1.226588487625122, 'eval_accuracy': 0.8757396449704142, 'eval_runtime': 3.8882, 'eval_samples_per_second': 86.93, 'eval_steps_per_second': 11.059, 'epoch': 30.0}


 78%|███████▊  | 3968/5120 [22:10<05:52,  3.27it/s]

{'loss': 0.0, 'learning_rate': 2.25e-06, 'epoch': 31.0}


                                                   
 78%|███████▊  | 3968/5120 [22:14<05:52,  3.27it/s]

{'eval_loss': 1.28258216381073, 'eval_accuracy': 0.8757396449704142, 'eval_runtime': 3.8779, 'eval_samples_per_second': 87.16, 'eval_steps_per_second': 11.088, 'epoch': 31.0}


 80%|████████  | 4096/5120 [22:53<05:10,  3.29it/s]

{'loss': 0.0, 'learning_rate': 2.0000000000000003e-06, 'epoch': 32.0}


                                                   
 80%|████████  | 4096/5120 [22:57<05:10,  3.29it/s]

{'eval_loss': 1.3003227710723877, 'eval_accuracy': 0.8757396449704142, 'eval_runtime': 3.8735, 'eval_samples_per_second': 87.259, 'eval_steps_per_second': 11.101, 'epoch': 32.0}


 82%|████████▎ | 4224/5120 [23:36<04:33,  3.28it/s]

{'loss': 0.0, 'learning_rate': 1.75e-06, 'epoch': 33.0}


                                                   
 82%|████████▎ | 4224/5120 [23:40<04:33,  3.28it/s]

{'eval_loss': 1.2972760200500488, 'eval_accuracy': 0.878698224852071, 'eval_runtime': 3.8886, 'eval_samples_per_second': 86.921, 'eval_steps_per_second': 11.058, 'epoch': 33.0}


 85%|████████▌ | 4352/5120 [24:19<03:57,  3.23it/s]

{'loss': 0.0, 'learning_rate': 1.5e-06, 'epoch': 34.0}


                                                   
 85%|████████▌ | 4352/5120 [24:23<03:57,  3.23it/s]

{'eval_loss': 1.3003333806991577, 'eval_accuracy': 0.878698224852071, 'eval_runtime': 3.8855, 'eval_samples_per_second': 86.99, 'eval_steps_per_second': 11.067, 'epoch': 34.0}


 88%|████████▊ | 4480/5120 [25:02<03:16,  3.26it/s]

{'loss': 0.0, 'learning_rate': 1.25e-06, 'epoch': 35.0}


                                                   
 88%|████████▊ | 4480/5120 [25:06<03:16,  3.26it/s]

{'eval_loss': 1.30630624294281, 'eval_accuracy': 0.878698224852071, 'eval_runtime': 3.8832, 'eval_samples_per_second': 87.043, 'eval_steps_per_second': 11.073, 'epoch': 35.0}


 90%|█████████ | 4608/5120 [25:45<02:37,  3.26it/s]

{'loss': 0.0, 'learning_rate': 1.0000000000000002e-06, 'epoch': 36.0}


                                                   
 90%|█████████ | 4608/5120 [25:49<02:37,  3.26it/s]

{'eval_loss': 1.3122895956039429, 'eval_accuracy': 0.878698224852071, 'eval_runtime': 3.8796, 'eval_samples_per_second': 87.122, 'eval_steps_per_second': 11.084, 'epoch': 36.0}


 92%|█████████▎| 4736/5120 [26:29<01:58,  3.24it/s]

{'loss': 0.0, 'learning_rate': 7.5e-07, 'epoch': 37.0}


                                                   
 92%|█████████▎| 4736/5120 [26:32<01:58,  3.24it/s]

{'eval_loss': 1.3189833164215088, 'eval_accuracy': 0.878698224852071, 'eval_runtime': 3.8807, 'eval_samples_per_second': 87.097, 'eval_steps_per_second': 11.08, 'epoch': 37.0}


 95%|█████████▌| 4864/5120 [27:11<01:18,  3.25it/s]

{'loss': 0.0, 'learning_rate': 5.000000000000001e-07, 'epoch': 38.0}


                                                   
 95%|█████████▌| 4864/5120 [27:15<01:18,  3.25it/s]

{'eval_loss': 1.3194996118545532, 'eval_accuracy': 0.878698224852071, 'eval_runtime': 3.8813, 'eval_samples_per_second': 87.085, 'eval_steps_per_second': 11.079, 'epoch': 38.0}


 98%|█████████▊| 4992/5120 [27:55<00:39,  3.25it/s]

{'loss': 0.0, 'learning_rate': 2.5000000000000004e-07, 'epoch': 39.0}


                                                   
 98%|█████████▊| 4992/5120 [27:58<00:39,  3.25it/s]

{'eval_loss': 1.3224128484725952, 'eval_accuracy': 0.878698224852071, 'eval_runtime': 3.8717, 'eval_samples_per_second': 87.301, 'eval_steps_per_second': 11.106, 'epoch': 39.0}


100%|██████████| 5120/5120 [28:39<00:00,  3.32it/s]

{'loss': 0.0, 'learning_rate': 0.0, 'epoch': 40.0}


                                                   
100%|██████████| 5120/5120 [28:43<00:00,  2.97it/s]

{'eval_loss': 1.3214057683944702, 'eval_accuracy': 0.878698224852071, 'eval_runtime': 3.8864, 'eval_samples_per_second': 86.97, 'eval_steps_per_second': 11.064, 'epoch': 40.0}
{'train_runtime': 1723.2091, 'train_samples_per_second': 23.77, 'train_steps_per_second': 2.971, 'train_loss': 0.04117766105839564, 'epoch': 40.0}
Execution Time : 1723 seconds





In [15]:
trainer.evaluate()

100%|██████████| 43/43 [00:03<00:00, 11.33it/s]


{'eval_loss': 1.3214057683944702,
 'eval_accuracy': 0.878698224852071,
 'eval_runtime': 3.8893,
 'eval_samples_per_second': 86.906,
 'eval_steps_per_second': 11.056,
 'epoch': 40.0}