In [1]:
import pandas as pd

finetuned_dirname = "40-epoch-roberta-finetuned-phemernr2-rnr-check-finetune"

data = pd.read_csv("../../data/phemernr2_dataset_with_tvt.csv", sep=",")
# data = data[['tweet_text', 'tvt2', 'label2']]
# data.columns = ['tweet_text', 'tvt2', 'label']
print(data.shape)
data.head()

(6425, 5)


Unnamed: 0,tweet_id,tweet_text,label,tvt2,tvt2_1
0,552833795142209536,the east london mosque would like to offer its...,non-rumours,training,training
1,580318210609696769,breaking - a germanwings airbus a320 plane rep...,true,validation,testting
2,552798891994009601,reports that two of the dead in the #charliehe...,true,training,training
3,576790814942236672,after #putin disappeared russian tv no longer ...,non-rumours,validation,training
4,499678822598340608,saw #ferguson for myself. #justiceformichaelbr...,non-rumours,testting,testting


In [2]:
data['label'] = data['label'].replace(['true', 'unverfied', 'false'], 'rumors')

In [3]:
combined_data = data

In [4]:
import torch

class CustomTextDataset(torch.utils.data.dataset.Dataset):

    def __init__(self, texts, labels):
        self.labels = labels
        self.texts = texts
        self.attention_mask = None
        self.input_ids = None
        self.token_type_ids = None

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        sample = {
            "text": self.texts[idx],
            "label": self.labels[idx],
            "attention_mask": self.attention_mask[idx] if self.attention_mask else None,
            "input_ids": self.input_ids[idx] if self.input_ids else None,
        }
        return sample
    
    def tokenize(self, tokenizer):
        self.attention_mask = []
        self.input_ids = []
        self.token_type_ids = []

        for text in self.texts:
            token = tokenizer(text, padding="max_length", truncation=True)
            
            self.attention_mask.append(token['attention_mask'])
            self.input_ids.append(token['input_ids'])

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
labels = []

labels_str = combined_data['label'].unique().tolist()
for i, d in combined_data.iterrows():
    if d['label'] == "non-rumours":
        lab = 1
    else:
        lab = 0
    labels.append(lab)
    
print(len(labels))
labels[:10]

6425


[1, 0, 0, 1, 1, 1, 1, 0, 1, 1]

In [6]:
train_dataset = CustomTextDataset(
    [d['tweet_text'] for i, d in combined_data.iterrows() if d['tvt2'] == 'training'],
    [labels[i] for i, d in combined_data.iterrows() if d['tvt2'] == 'training'])
test_dataset = CustomTextDataset(
    [d['tweet_text'] for i, d in combined_data.iterrows() if d['tvt2'] == 'validation'],
    [labels[i] for i, d in combined_data.iterrows() if d['tvt2'] == 'validation'])
train_dataset[0]

{'text': 'the east london mosque would like to offer its sincere condolences to the families of those killed during the #charliehebdo attacks (1/2)',
 'label': 1,
 'attention_mask': None,
 'input_ids': None}

In [7]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("roberta-base")

In [8]:
# inputs = tokenizer(["you're stuck in a timewrap from 2004 though", "summa lumma dumma lumma"], padding="max_length", truncation=True)
# for k,v in inputs.items():
#     print(k)

In [9]:
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

train_dataset.tokenize(tokenizer)
test_dataset.tokenize(tokenizer)

In [10]:
print(len(train_dataset))
print(len(test_dataset))

4336
1462


### Fine Tuning

In [11]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained("roberta-base",
                                                           output_hidden_states=False,
                                                           num_labels=2)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [12]:
from transformers import TrainingArguments

epochs = 40
batch_size = 8
save_steps = (round((len(train_dataset)/batch_size) + 0.49)) * epochs
# save_steps = 1_000_000

training_args = TrainingArguments(
    output_dir=f"../../data/models/{finetuned_dirname}",
    num_train_epochs=epochs,
    save_steps=save_steps,
    logging_steps=300,
    learning_rate=1e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    evaluation_strategy="epoch",
    logging_strategy="epoch"
)

print(f"Save Steps : {save_steps}")

Save Steps : 21680


In [13]:
import numpy as np
from datasets import load_metric
# from copy import deepcopy

metric = load_metric("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

# def compute_metrics(eval_pred):
#     metrics = ["accuracy", "recall", "precision", "f1"] #List of metrics to return
#     metric={}
#     for met in metrics:
#        metric[met] = load_metric(met)
#     logits, labels = eval_pred
#     predictions = np.argmax(logits, axis=-1)
#     metric_res={}
#     for met in metrics:
#        metric_res[met]=metric[met].compute(predictions=predictions, references=labels)[met]
#     return metric_res

  metric = load_metric("accuracy")


In [14]:
# class CustomCallback(TrainerCallback):
    
#     def __init__(self, trainer) -> None:
#         super().__init__()
#         self._trainer = trainer
    
#     def on_epoch_end(self, args, state, control, **kwargs):
#         if control.should_evaluate:
#             control_copy = deepcopy(control)
#             self._trainer.evaluate(eval_dataset=self._trainer.train_dataset, metric_key_prefix="train")
#             return control_copy

In [15]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
)

In [16]:
import time

start = time.time()

train = trainer.train()

print(f"Execution Time : {round(time.time() - start)} seconds")

  2%|▎         | 542/21680 [02:47<1:48:54,  3.24it/s]

{'loss': 0.4826, 'learning_rate': 9.75e-06, 'epoch': 1.0}


                                                     
  2%|▎         | 542/21680 [03:03<1:48:54,  3.24it/s]

{'eval_loss': 0.4852116107940674, 'eval_accuracy': 0.8160054719562243, 'eval_runtime': 16.718, 'eval_samples_per_second': 87.45, 'eval_steps_per_second': 10.946, 'epoch': 1.0}


  5%|▌         | 1084/21680 [05:49<1:45:09,  3.26it/s]

{'loss': 0.375, 'learning_rate': 9.5e-06, 'epoch': 2.0}


                                                      
  5%|▌         | 1084/21680 [06:06<1:45:09,  3.26it/s]

{'eval_loss': 0.36050084233283997, 'eval_accuracy': 0.8700410396716827, 'eval_runtime': 16.7371, 'eval_samples_per_second': 87.351, 'eval_steps_per_second': 10.934, 'epoch': 2.0}


  8%|▊         | 1626/21680 [08:52<1:43:03,  3.24it/s] 

{'loss': 0.2982, 'learning_rate': 9.250000000000001e-06, 'epoch': 3.0}


                                                      
  8%|▊         | 1626/21680 [09:09<1:43:03,  3.24it/s]

{'eval_loss': 0.46748846769332886, 'eval_accuracy': 0.8782489740082079, 'eval_runtime': 16.7437, 'eval_samples_per_second': 87.317, 'eval_steps_per_second': 10.93, 'epoch': 3.0}


 10%|█         | 2168/21680 [11:55<1:39:02,  3.28it/s] 

{'loss': 0.2375, 'learning_rate': 9e-06, 'epoch': 4.0}


                                                      
 10%|█         | 2168/21680 [12:12<1:39:02,  3.28it/s]

{'eval_loss': 0.521999716758728, 'eval_accuracy': 0.884404924760602, 'eval_runtime': 16.7015, 'eval_samples_per_second': 87.537, 'eval_steps_per_second': 10.957, 'epoch': 4.0}


 12%|█▎        | 2710/21680 [14:58<1:37:01,  3.26it/s] 

{'loss': 0.1759, 'learning_rate': 8.750000000000001e-06, 'epoch': 5.0}


                                                      
 12%|█▎        | 2710/21680 [15:15<1:37:01,  3.26it/s]

{'eval_loss': 0.6187268495559692, 'eval_accuracy': 0.8891928864569083, 'eval_runtime': 16.7474, 'eval_samples_per_second': 87.297, 'eval_steps_per_second': 10.927, 'epoch': 5.0}


 15%|█▌        | 3252/21680 [18:02<1:34:30,  3.25it/s] 

{'loss': 0.1207, 'learning_rate': 8.5e-06, 'epoch': 6.0}


                                                      
 15%|█▌        | 3252/21680 [18:18<1:34:30,  3.25it/s]

{'eval_loss': 0.8528735637664795, 'eval_accuracy': 0.8707250341997264, 'eval_runtime': 16.7389, 'eval_samples_per_second': 87.341, 'eval_steps_per_second': 10.933, 'epoch': 6.0}


 18%|█▊        | 3794/21680 [21:05<1:31:59,  3.24it/s] 

{'loss': 0.1212, 'learning_rate': 8.25e-06, 'epoch': 7.0}


                                                      
 18%|█▊        | 3794/21680 [21:22<1:31:59,  3.24it/s]

{'eval_loss': 0.7998347282409668, 'eval_accuracy': 0.8734610123119015, 'eval_runtime': 16.7556, 'eval_samples_per_second': 87.254, 'eval_steps_per_second': 10.922, 'epoch': 7.0}


 20%|██        | 4336/21680 [24:08<1:27:45,  3.29it/s] 

{'loss': 0.0547, 'learning_rate': 8.000000000000001e-06, 'epoch': 8.0}


                                                      
 20%|██        | 4336/21680 [24:25<1:27:45,  3.29it/s]

{'eval_loss': 0.9476599097251892, 'eval_accuracy': 0.8741450068399452, 'eval_runtime': 16.7137, 'eval_samples_per_second': 87.473, 'eval_steps_per_second': 10.949, 'epoch': 8.0}


 22%|██▎       | 4878/21680 [27:10<1:26:34,  3.23it/s] 

{'loss': 0.0615, 'learning_rate': 7.75e-06, 'epoch': 9.0}


                                                      
 22%|██▎       | 4878/21680 [27:27<1:26:34,  3.23it/s]

{'eval_loss': 0.909144937992096, 'eval_accuracy': 0.8796169630642955, 'eval_runtime': 16.7464, 'eval_samples_per_second': 87.303, 'eval_steps_per_second': 10.928, 'epoch': 9.0}


 25%|██▌       | 5420/21680 [30:13<1:22:23,  3.29it/s] 

{'loss': 0.0515, 'learning_rate': 7.500000000000001e-06, 'epoch': 10.0}


                                                      
 25%|██▌       | 5420/21680 [30:30<1:22:23,  3.29it/s]

{'eval_loss': 1.0293669700622559, 'eval_accuracy': 0.8734610123119015, 'eval_runtime': 16.7453, 'eval_samples_per_second': 87.308, 'eval_steps_per_second': 10.928, 'epoch': 10.0}


 28%|██▊       | 5962/21680 [33:17<1:21:09,  3.23it/s] 

{'loss': 0.0376, 'learning_rate': 7.25e-06, 'epoch': 11.0}


                                                      
 28%|██▊       | 5962/21680 [33:33<1:21:09,  3.23it/s]

{'eval_loss': 1.0461368560791016, 'eval_accuracy': 0.8803009575923393, 'eval_runtime': 16.7245, 'eval_samples_per_second': 87.417, 'eval_steps_per_second': 10.942, 'epoch': 11.0}


 30%|███       | 6504/21680 [36:19<1:17:28,  3.26it/s] 

{'loss': 0.0345, 'learning_rate': 7e-06, 'epoch': 12.0}


                                                      
 30%|███       | 6504/21680 [36:36<1:17:28,  3.26it/s]

{'eval_loss': 1.0318363904953003, 'eval_accuracy': 0.8857729138166894, 'eval_runtime': 16.7025, 'eval_samples_per_second': 87.532, 'eval_steps_per_second': 10.956, 'epoch': 12.0}


 32%|███▎      | 7046/21680 [39:22<1:15:04,  3.25it/s] 

{'loss': 0.0398, 'learning_rate': 6.750000000000001e-06, 'epoch': 13.0}


                                                      
 32%|███▎      | 7046/21680 [39:39<1:15:04,  3.25it/s]

{'eval_loss': 0.9783596396446228, 'eval_accuracy': 0.8898768809849521, 'eval_runtime': 16.7542, 'eval_samples_per_second': 87.261, 'eval_steps_per_second': 10.923, 'epoch': 13.0}


 35%|███▌      | 7588/21680 [42:25<1:11:27,  3.29it/s] 

{'loss': 0.0385, 'learning_rate': 6.5000000000000004e-06, 'epoch': 14.0}


                                                      
 35%|███▌      | 7588/21680 [42:41<1:11:27,  3.29it/s]

{'eval_loss': 1.0043928623199463, 'eval_accuracy': 0.8761969904240766, 'eval_runtime': 16.745, 'eval_samples_per_second': 87.31, 'eval_steps_per_second': 10.929, 'epoch': 14.0}


 38%|███▊      | 8130/21680 [45:28<1:09:45,  3.24it/s] 

{'loss': 0.0302, 'learning_rate': 6.25e-06, 'epoch': 15.0}


                                                      
 38%|███▊      | 8130/21680 [45:45<1:09:45,  3.24it/s]

{'eval_loss': 1.063581943511963, 'eval_accuracy': 0.8837209302325582, 'eval_runtime': 16.7197, 'eval_samples_per_second': 87.442, 'eval_steps_per_second': 10.945, 'epoch': 15.0}


 40%|████      | 8672/21680 [48:31<1:06:01,  3.28it/s] 

{'loss': 0.0287, 'learning_rate': 6e-06, 'epoch': 16.0}


                                                      
 40%|████      | 8672/21680 [48:47<1:06:01,  3.28it/s]

{'eval_loss': 1.0883427858352661, 'eval_accuracy': 0.8857729138166894, 'eval_runtime': 16.7355, 'eval_samples_per_second': 87.359, 'eval_steps_per_second': 10.935, 'epoch': 16.0}


 42%|████▎     | 9214/21680 [51:34<1:04:00,  3.25it/s] 

{'loss': 0.0281, 'learning_rate': 5.75e-06, 'epoch': 17.0}


                                                      
 42%|████▎     | 9214/21680 [51:50<1:04:00,  3.25it/s]

{'eval_loss': 1.047033667564392, 'eval_accuracy': 0.8878248974008208, 'eval_runtime': 16.7439, 'eval_samples_per_second': 87.315, 'eval_steps_per_second': 10.929, 'epoch': 17.0}


 45%|████▌     | 9756/21680 [54:36<1:00:34,  3.28it/s] 

{'loss': 0.0355, 'learning_rate': 5.500000000000001e-06, 'epoch': 18.0}


                                                      
 45%|████▌     | 9756/21680 [54:53<1:00:34,  3.28it/s]

{'eval_loss': 0.9572548270225525, 'eval_accuracy': 0.8864569083447332, 'eval_runtime': 16.7557, 'eval_samples_per_second': 87.254, 'eval_steps_per_second': 10.922, 'epoch': 18.0}


 48%|████▊     | 10298/21680 [57:39<58:13,  3.26it/s]  

{'loss': 0.025, 'learning_rate': 5.2500000000000006e-06, 'epoch': 19.0}


                                                     
 48%|████▊     | 10298/21680 [57:56<58:13,  3.26it/s]

{'eval_loss': 1.0920027494430542, 'eval_accuracy': 0.8885088919288646, 'eval_runtime': 16.708, 'eval_samples_per_second': 87.503, 'eval_steps_per_second': 10.953, 'epoch': 19.0}


 50%|█████     | 10840/21680 [1:00:41<55:33,  3.25it/s] 

{'loss': 0.0182, 'learning_rate': 5e-06, 'epoch': 20.0}


                                                       
 50%|█████     | 10840/21680 [1:00:58<55:33,  3.25it/s]

{'eval_loss': 1.0620630979537964, 'eval_accuracy': 0.8926128590971272, 'eval_runtime': 16.754, 'eval_samples_per_second': 87.263, 'eval_steps_per_second': 10.923, 'epoch': 20.0}


 52%|█████▎    | 11382/21680 [1:03:44<52:23,  3.28it/s]   

{'loss': 0.0161, 'learning_rate': 4.75e-06, 'epoch': 21.0}


                                                       
 52%|█████▎    | 11382/21680 [1:04:01<52:23,  3.28it/s]

{'eval_loss': 1.1740738153457642, 'eval_accuracy': 0.8837209302325582, 'eval_runtime': 16.7453, 'eval_samples_per_second': 87.308, 'eval_steps_per_second': 10.928, 'epoch': 21.0}


 55%|█████▌    | 11924/21680 [1:06:47<50:35,  3.21it/s]   

{'loss': 0.0234, 'learning_rate': 4.5e-06, 'epoch': 22.0}


                                                       
 55%|█████▌    | 11924/21680 [1:07:04<50:35,  3.21it/s]

{'eval_loss': 1.0493558645248413, 'eval_accuracy': 0.8789329685362517, 'eval_runtime': 16.7295, 'eval_samples_per_second': 87.39, 'eval_steps_per_second': 10.939, 'epoch': 22.0}


 57%|█████▊    | 12466/21680 [1:09:50<47:16,  3.25it/s]   

{'loss': 0.0097, 'learning_rate': 4.25e-06, 'epoch': 23.0}


                                                       
 57%|█████▊    | 12466/21680 [1:10:06<47:16,  3.25it/s]

{'eval_loss': 1.1084039211273193, 'eval_accuracy': 0.8885088919288646, 'eval_runtime': 16.7158, 'eval_samples_per_second': 87.462, 'eval_steps_per_second': 10.948, 'epoch': 23.0}


 60%|██████    | 13008/21680 [1:12:52<44:36,  3.24it/s]   

{'loss': 0.011, 'learning_rate': 4.000000000000001e-06, 'epoch': 24.0}


                                                       
 60%|██████    | 13008/21680 [1:13:09<44:36,  3.24it/s]

{'eval_loss': 1.2500113248825073, 'eval_accuracy': 0.8837209302325582, 'eval_runtime': 16.749, 'eval_samples_per_second': 87.289, 'eval_steps_per_second': 10.926, 'epoch': 24.0}


 62%|██████▎   | 13550/21680 [1:15:55<41:33,  3.26it/s]   

{'loss': 0.0073, 'learning_rate': 3.7500000000000005e-06, 'epoch': 25.0}


                                                       
 62%|██████▎   | 13550/21680 [1:16:12<41:33,  3.26it/s]

{'eval_loss': 1.2012773752212524, 'eval_accuracy': 0.884404924760602, 'eval_runtime': 16.7525, 'eval_samples_per_second': 87.271, 'eval_steps_per_second': 10.924, 'epoch': 25.0}


 65%|██████▌   | 14092/21680 [1:18:58<38:44,  3.27it/s]   

{'loss': 0.0018, 'learning_rate': 3.5e-06, 'epoch': 26.0}


                                                       
 65%|██████▌   | 14092/21680 [1:19:15<38:44,  3.27it/s]

{'eval_loss': 1.2451566457748413, 'eval_accuracy': 0.8878248974008208, 'eval_runtime': 16.722, 'eval_samples_per_second': 87.43, 'eval_steps_per_second': 10.944, 'epoch': 26.0}


 68%|██████▊   | 14634/21680 [1:22:01<35:53,  3.27it/s]   

{'loss': 0.0061, 'learning_rate': 3.2500000000000002e-06, 'epoch': 27.0}


                                                       
 68%|██████▊   | 14634/21680 [1:22:18<35:53,  3.27it/s]

{'eval_loss': 1.238472819328308, 'eval_accuracy': 0.8837209302325582, 'eval_runtime': 16.7433, 'eval_samples_per_second': 87.318, 'eval_steps_per_second': 10.93, 'epoch': 27.0}


 70%|███████   | 15176/21680 [1:25:04<33:32,  3.23it/s]   

{'loss': 0.0109, 'learning_rate': 3e-06, 'epoch': 28.0}


                                                       
 70%|███████   | 15176/21680 [1:25:21<33:32,  3.23it/s]

{'eval_loss': 1.456903338432312, 'eval_accuracy': 0.8645690834473324, 'eval_runtime': 16.7469, 'eval_samples_per_second': 87.3, 'eval_steps_per_second': 10.927, 'epoch': 28.0}


 72%|███████▎  | 15718/21680 [1:28:07<30:43,  3.23it/s]  

{'loss': 0.0107, 'learning_rate': 2.7500000000000004e-06, 'epoch': 29.0}


                                                       
 72%|███████▎  | 15718/21680 [1:28:24<30:43,  3.23it/s]

{'eval_loss': 1.2260854244232178, 'eval_accuracy': 0.8809849521203831, 'eval_runtime': 16.7641, 'eval_samples_per_second': 87.21, 'eval_steps_per_second': 10.916, 'epoch': 29.0}


 75%|███████▌  | 16260/21680 [1:31:10<27:48,  3.25it/s]  

{'loss': 0.0078, 'learning_rate': 2.5e-06, 'epoch': 30.0}


                                                       
 75%|███████▌  | 16260/21680 [1:31:27<27:48,  3.25it/s]

{'eval_loss': 1.083395004272461, 'eval_accuracy': 0.8980848153214774, 'eval_runtime': 16.7126, 'eval_samples_per_second': 87.479, 'eval_steps_per_second': 10.95, 'epoch': 30.0}


 78%|███████▊  | 16802/21680 [1:34:13<24:51,  3.27it/s]  

{'loss': 0.0065, 'learning_rate': 2.25e-06, 'epoch': 31.0}


                                                       
 78%|███████▊  | 16802/21680 [1:34:30<24:51,  3.27it/s]

{'eval_loss': 1.126112937927246, 'eval_accuracy': 0.8898768809849521, 'eval_runtime': 16.739, 'eval_samples_per_second': 87.341, 'eval_steps_per_second': 10.933, 'epoch': 31.0}


 80%|████████  | 17344/21680 [1:37:16<22:07,  3.27it/s]  

{'loss': 0.0083, 'learning_rate': 2.0000000000000003e-06, 'epoch': 32.0}


                                                       
 80%|████████  | 17344/21680 [1:37:32<22:07,  3.27it/s]

{'eval_loss': 1.1272838115692139, 'eval_accuracy': 0.8885088919288646, 'eval_runtime': 16.746, 'eval_samples_per_second': 87.304, 'eval_steps_per_second': 10.928, 'epoch': 32.0}


 82%|████████▎ | 17886/21680 [1:40:18<19:16,  3.28it/s]  

{'loss': 0.0014, 'learning_rate': 1.75e-06, 'epoch': 33.0}


                                                       
 82%|████████▎ | 17886/21680 [1:40:35<19:16,  3.28it/s]

{'eval_loss': 1.197569489479065, 'eval_accuracy': 0.8885088919288646, 'eval_runtime': 16.7277, 'eval_samples_per_second': 87.4, 'eval_steps_per_second': 10.94, 'epoch': 33.0}


 85%|████████▌ | 18428/21680 [1:43:21<16:47,  3.23it/s]  

{'loss': 0.0038, 'learning_rate': 1.5e-06, 'epoch': 34.0}


                                                       
 85%|████████▌ | 18428/21680 [1:43:38<16:47,  3.23it/s]

{'eval_loss': 1.1712239980697632, 'eval_accuracy': 0.8885088919288646, 'eval_runtime': 16.7195, 'eval_samples_per_second': 87.443, 'eval_steps_per_second': 10.945, 'epoch': 34.0}


 88%|████████▊ | 18970/21680 [1:46:24<13:46,  3.28it/s]  

{'loss': 0.0009, 'learning_rate': 1.25e-06, 'epoch': 35.0}


                                                       
 88%|████████▊ | 18970/21680 [1:46:41<13:46,  3.28it/s]

{'eval_loss': 1.3548312187194824, 'eval_accuracy': 0.8789329685362517, 'eval_runtime': 16.7432, 'eval_samples_per_second': 87.319, 'eval_steps_per_second': 10.93, 'epoch': 35.0}


 90%|█████████ | 19512/21680 [1:49:27<10:58,  3.29it/s]  

{'loss': 0.0064, 'learning_rate': 1.0000000000000002e-06, 'epoch': 36.0}


 90%|█████████ | 19512/21680 [1:49:44<10:58,  3.29it/s]

{'eval_loss': 1.1798549890518188, 'eval_accuracy': 0.8885088919288646, 'eval_runtime': 16.7493, 'eval_samples_per_second': 87.287, 'eval_steps_per_second': 10.926, 'epoch': 36.0}


 92%|█████████▎| 20054/21680 [1:52:30<08:17,  3.27it/s]  

{'loss': 0.001, 'learning_rate': 7.5e-07, 'epoch': 37.0}


                                                       
 92%|█████████▎| 20054/21680 [1:52:46<08:17,  3.27it/s]

{'eval_loss': 1.1924571990966797, 'eval_accuracy': 0.8885088919288646, 'eval_runtime': 16.7398, 'eval_samples_per_second': 87.337, 'eval_steps_per_second': 10.932, 'epoch': 37.0}


 95%|█████████▌| 20596/21680 [1:55:33<05:33,  3.25it/s]  

{'loss': 0.0033, 'learning_rate': 5.000000000000001e-07, 'epoch': 38.0}


                                                       
 95%|█████████▌| 20596/21680 [1:55:49<05:33,  3.25it/s]

{'eval_loss': 1.1868427991867065, 'eval_accuracy': 0.8919288645690835, 'eval_runtime': 16.7471, 'eval_samples_per_second': 87.299, 'eval_steps_per_second': 10.927, 'epoch': 38.0}


 98%|█████████▊| 21138/21680 [1:58:36<02:45,  3.28it/s]  

{'loss': 0.0012, 'learning_rate': 2.5000000000000004e-07, 'epoch': 39.0}


                                                       
 98%|█████████▊| 21138/21680 [1:58:52<02:45,  3.28it/s]

{'eval_loss': 1.1774280071258545, 'eval_accuracy': 0.8926128590971272, 'eval_runtime': 16.7644, 'eval_samples_per_second': 87.209, 'eval_steps_per_second': 10.916, 'epoch': 39.0}


100%|██████████| 21680/21680 [2:01:40<00:00,  3.25it/s]

{'loss': 0.0027, 'learning_rate': 0.0, 'epoch': 40.0}


                                                       
100%|██████████| 21680/21680 [2:01:57<00:00,  2.96it/s]

{'eval_loss': 1.1771156787872314, 'eval_accuracy': 0.893296853625171, 'eval_runtime': 16.7531, 'eval_samples_per_second': 87.267, 'eval_steps_per_second': 10.923, 'epoch': 40.0}
{'train_runtime': 7317.7142, 'train_samples_per_second': 23.701, 'train_steps_per_second': 2.963, 'train_loss': 0.0608820932336509, 'epoch': 40.0}
Execution Time : 7318 seconds





In [17]:
trainer.evaluate()

100%|██████████| 183/183 [00:16<00:00, 11.01it/s]


{'eval_loss': 1.1771156787872314,
 'eval_accuracy': 0.893296853625171,
 'eval_runtime': 16.7214,
 'eval_samples_per_second': 87.433,
 'eval_steps_per_second': 10.944,
 'epoch': 40.0}