In [1]:
import pandas as pd

finetuned_dirname = "40-epoch-bert-base-finetuned-twitter15-rnr"

data = pd.read_csv("../../data/processed/twitter15_dataset_with_tvt.csv", lineterminator="\n")
data = data[['tweet_text', 'tvt2', 'label']]
print(data.shape)
data.head()

(1490, 3)


Unnamed: 0,tweet_text,tvt2,label
0,🔥ca kkk grand wizard 🔥 endorses @hillaryclinto...,training,unverified
1,an open letter to trump voters from his top st...,training,unverified
2,america is a nation of second chances —@potus ...,validation,non-rumor
3,"brandon marshall visits and offers advice, sup...",testting,non-rumor
4,rip elly may clampett: so sad to learn #beverl...,validation,true


In [2]:
combined_data = data

In [3]:
import torch

class CustomTextDataset(torch.utils.data.dataset.Dataset):

    def __init__(self, texts, labels):
        self.labels = labels
        self.texts = texts
        self.attention_mask = None
        self.input_ids = None
        self.token_type_ids = None

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        sample = {
            "text": self.texts[idx],
            "label": self.labels[idx],
            "attention_mask": self.attention_mask[idx] if self.attention_mask else None,
            "input_ids": self.input_ids[idx] if self.input_ids else None,
            "token_type_ids": self.token_type_ids[idx] if self.token_type_ids else None
        }
        return sample
    
    def tokenize(self, tokenizer):
        self.attention_mask = []
        self.input_ids = []
        self.token_type_ids = []

        for text in self.texts:
            token = tokenizer(text, padding="max_length", truncation=True)
            
            self.attention_mask.append(token['attention_mask'])
            self.input_ids.append(token['input_ids'])
            self.token_type_ids.append(token['token_type_ids'])

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
labels = []

labels_str = combined_data['label'].unique().tolist()
for i, d in combined_data.iterrows():
#     lab = labels_str.index(d['label'])
    if d['label'] in ['true', 'false', 'unverified']:
        lab = 0
    else:
        lab = 1
    labels.append(lab)
    
print(len(labels))
labels[:10]

1490


[0, 0, 1, 1, 0, 1, 0, 0, 0, 0]

In [5]:
train_dataset = CustomTextDataset(
    [d['tweet_text'] for i, d in combined_data.iterrows() if d['tvt2'] == 'training'],
    [labels[i] for i, d in combined_data.iterrows() if d['tvt2'] == 'training'])
test_dataset = CustomTextDataset(
    [d['tweet_text'] for i, d in combined_data.iterrows() if d['tvt2'] == 'validation'],
    [labels[i] for i, d in combined_data.iterrows() if d['tvt2'] == 'validation'])
train_dataset[0]

{'text': '🔥ca kkk grand wizard 🔥 endorses @hillaryclinton #neverhillary #trump2016 URL\r',
 'label': 0,
 'attention_mask': None,
 'input_ids': None,
 'token_type_ids': None}

In [6]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")

In [7]:
# inputs = tokenizer(["you're stuck in a timewrap from 2004 though", "summa lumma dumma lumma"], padding="max_length", truncation=True)
# for k,v in inputs.items():
#     print(k)

In [8]:
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

train_dataset.tokenize(tokenizer)
test_dataset.tokenize(tokenizer)

In [9]:
print(len(train_dataset))
print(len(test_dataset))

1024
338


### Fine Tuning

In [10]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased",
                                                           output_hidden_states=False,
                                                           num_labels=2)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [11]:
from transformers import TrainingArguments

epochs = 40
batch_size = 8
save_steps = (round((len(train_dataset)/batch_size) + 0.5)) * epochs
# save_steps = 1_000_000

training_args = TrainingArguments(
    output_dir=f"../../data/models/{finetuned_dirname}",
    num_train_epochs=epochs,
    save_steps=save_steps,
    logging_steps=300,
    learning_rate=1e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    evaluation_strategy="epoch",
    logging_strategy="epoch"
)

print(f"Save Steps : {save_steps}")

Save Steps : 5120


In [12]:
import numpy as np
from datasets import load_metric

metric = load_metric("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

  metric = load_metric("accuracy")


In [13]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
)

In [14]:
import time

start = time.time()

trainer.train()

print(f"Execution Time : {round(time.time() - start)} seconds")

  2%|▎         | 128/5120 [00:42<26:08,  3.18it/s]

{'loss': 0.5559, 'learning_rate': 9.75e-06, 'epoch': 1.0}


                                                  
  2%|▎         | 128/5120 [00:46<26:08,  3.18it/s]

{'eval_loss': 0.5195600390434265, 'eval_accuracy': 0.7337278106508875, 'eval_runtime': 4.1378, 'eval_samples_per_second': 81.686, 'eval_steps_per_second': 10.392, 'epoch': 1.0}


  5%|▌         | 256/5120 [01:26<25:27,  3.18it/s]  

{'loss': 0.4129, 'learning_rate': 9.5e-06, 'epoch': 2.0}


                                                  
  5%|▌         | 256/5120 [01:30<25:27,  3.18it/s]

{'eval_loss': 0.6117736101150513, 'eval_accuracy': 0.772189349112426, 'eval_runtime': 4.1093, 'eval_samples_per_second': 82.253, 'eval_steps_per_second': 10.464, 'epoch': 2.0}


  8%|▊         | 384/5120 [02:10<24:53,  3.17it/s]  

{'loss': 0.2297, 'learning_rate': 9.250000000000001e-06, 'epoch': 3.0}


                                                  
  8%|▊         | 384/5120 [02:14<24:53,  3.17it/s]

{'eval_loss': 0.7054131627082825, 'eval_accuracy': 0.8076923076923077, 'eval_runtime': 4.1228, 'eval_samples_per_second': 81.983, 'eval_steps_per_second': 10.43, 'epoch': 3.0}


 10%|█         | 512/5120 [02:55<24:15,  3.17it/s]  

{'loss': 0.1296, 'learning_rate': 9e-06, 'epoch': 4.0}


                                                  
 10%|█         | 512/5120 [02:59<24:15,  3.17it/s]

{'eval_loss': 0.7658370137214661, 'eval_accuracy': 0.8254437869822485, 'eval_runtime': 4.1356, 'eval_samples_per_second': 81.729, 'eval_steps_per_second': 10.397, 'epoch': 4.0}


 12%|█▎        | 640/5120 [03:39<23:36,  3.16it/s]  

{'loss': 0.0689, 'learning_rate': 8.750000000000001e-06, 'epoch': 5.0}


                                                  
 12%|█▎        | 640/5120 [03:44<23:36,  3.16it/s]

{'eval_loss': 0.8319809436798096, 'eval_accuracy': 0.8431952662721893, 'eval_runtime': 4.1525, 'eval_samples_per_second': 81.397, 'eval_steps_per_second': 10.355, 'epoch': 5.0}


 15%|█▌        | 768/5120 [04:24<23:01,  3.15it/s]  

{'loss': 0.0379, 'learning_rate': 8.5e-06, 'epoch': 6.0}


                                                  
 15%|█▌        | 768/5120 [04:28<23:01,  3.15it/s]

{'eval_loss': 0.9195674061775208, 'eval_accuracy': 0.8579881656804734, 'eval_runtime': 4.1668, 'eval_samples_per_second': 81.118, 'eval_steps_per_second': 10.32, 'epoch': 6.0}


 18%|█▊        | 896/5120 [05:09<22:13,  3.17it/s]  

{'loss': 0.0256, 'learning_rate': 8.25e-06, 'epoch': 7.0}


                                                  
 18%|█▊        | 896/5120 [05:13<22:13,  3.17it/s]

{'eval_loss': 1.0093482732772827, 'eval_accuracy': 0.8431952662721893, 'eval_runtime': 4.1693, 'eval_samples_per_second': 81.068, 'eval_steps_per_second': 10.313, 'epoch': 7.0}


 20%|██        | 1024/5120 [05:53<21:38,  3.15it/s] 

{'loss': 0.0228, 'learning_rate': 8.000000000000001e-06, 'epoch': 8.0}


                                                   
 20%|██        | 1024/5120 [05:58<21:38,  3.15it/s]

{'eval_loss': 0.9252911806106567, 'eval_accuracy': 0.8550295857988166, 'eval_runtime': 4.157, 'eval_samples_per_second': 81.308, 'eval_steps_per_second': 10.344, 'epoch': 8.0}


 22%|██▎       | 1152/5120 [06:38<20:53,  3.16it/s]  

{'loss': 0.0149, 'learning_rate': 7.75e-06, 'epoch': 9.0}


                                                   
 22%|██▎       | 1152/5120 [06:42<20:53,  3.16it/s]

{'eval_loss': 1.1990423202514648, 'eval_accuracy': 0.8313609467455622, 'eval_runtime': 4.1452, 'eval_samples_per_second': 81.541, 'eval_steps_per_second': 10.374, 'epoch': 9.0}


 25%|██▌       | 1280/5120 [07:23<20:12,  3.17it/s]  

{'loss': 0.0139, 'learning_rate': 7.500000000000001e-06, 'epoch': 10.0}


                                                   
 25%|██▌       | 1280/5120 [07:27<20:12,  3.17it/s]

{'eval_loss': 1.0894993543624878, 'eval_accuracy': 0.8461538461538461, 'eval_runtime': 4.1478, 'eval_samples_per_second': 81.489, 'eval_steps_per_second': 10.367, 'epoch': 10.0}


 28%|██▊       | 1408/5120 [08:07<19:33,  3.16it/s]  

{'loss': 0.0054, 'learning_rate': 7.25e-06, 'epoch': 11.0}


                                                   
 28%|██▊       | 1408/5120 [08:11<19:33,  3.16it/s]

{'eval_loss': 0.9916428923606873, 'eval_accuracy': 0.8461538461538461, 'eval_runtime': 4.1533, 'eval_samples_per_second': 81.381, 'eval_steps_per_second': 10.353, 'epoch': 11.0}


 30%|███       | 1536/5120 [08:52<18:50,  3.17it/s]  

{'loss': 0.0049, 'learning_rate': 7e-06, 'epoch': 12.0}


                                                   
 30%|███       | 1536/5120 [08:56<18:50,  3.17it/s]

{'eval_loss': 1.1301589012145996, 'eval_accuracy': 0.8431952662721893, 'eval_runtime': 4.1629, 'eval_samples_per_second': 81.194, 'eval_steps_per_second': 10.329, 'epoch': 12.0}


 32%|███▎      | 1664/5120 [09:37<18:20,  3.14it/s]  

{'loss': 0.0005, 'learning_rate': 6.750000000000001e-06, 'epoch': 13.0}


                                                   
 32%|███▎      | 1664/5120 [09:41<18:20,  3.14it/s]

{'eval_loss': 1.0716240406036377, 'eval_accuracy': 0.8550295857988166, 'eval_runtime': 4.1689, 'eval_samples_per_second': 81.077, 'eval_steps_per_second': 10.315, 'epoch': 13.0}


 35%|███▌      | 1792/5120 [10:21<17:31,  3.16it/s]  

{'loss': 0.0003, 'learning_rate': 6.5000000000000004e-06, 'epoch': 14.0}


                                                   
 35%|███▌      | 1792/5120 [10:25<17:31,  3.16it/s]

{'eval_loss': 1.1184834241867065, 'eval_accuracy': 0.849112426035503, 'eval_runtime': 4.1663, 'eval_samples_per_second': 81.126, 'eval_steps_per_second': 10.321, 'epoch': 14.0}


 38%|███▊      | 1920/5120 [11:06<16:50,  3.17it/s]  

{'loss': 0.0003, 'learning_rate': 6.25e-06, 'epoch': 15.0}


                                                   
 38%|███▊      | 1920/5120 [11:10<16:50,  3.17it/s]

{'eval_loss': 1.1240882873535156, 'eval_accuracy': 0.8550295857988166, 'eval_runtime': 4.1675, 'eval_samples_per_second': 81.103, 'eval_steps_per_second': 10.318, 'epoch': 15.0}


 40%|████      | 2048/5120 [11:51<16:11,  3.16it/s]  

{'loss': 0.0002, 'learning_rate': 6e-06, 'epoch': 16.0}


                                                   
 40%|████      | 2048/5120 [11:55<16:11,  3.16it/s]

{'eval_loss': 1.0627765655517578, 'eval_accuracy': 0.8609467455621301, 'eval_runtime': 4.1515, 'eval_samples_per_second': 81.415, 'eval_steps_per_second': 10.358, 'epoch': 16.0}


 42%|████▎     | 2176/5120 [12:35<15:30,  3.16it/s]  

{'loss': 0.0001, 'learning_rate': 5.75e-06, 'epoch': 17.0}


                                                   
 42%|████▎     | 2176/5120 [12:39<15:30,  3.16it/s]

{'eval_loss': 1.1230745315551758, 'eval_accuracy': 0.8550295857988166, 'eval_runtime': 4.1577, 'eval_samples_per_second': 81.295, 'eval_steps_per_second': 10.342, 'epoch': 17.0}


 45%|████▌     | 2304/5120 [13:20<14:49,  3.16it/s]  

{'loss': 0.0113, 'learning_rate': 5.500000000000001e-06, 'epoch': 18.0}


                                                   
 45%|████▌     | 2304/5120 [13:24<14:49,  3.16it/s]

{'eval_loss': 1.2181907892227173, 'eval_accuracy': 0.8402366863905325, 'eval_runtime': 4.1566, 'eval_samples_per_second': 81.317, 'eval_steps_per_second': 10.345, 'epoch': 18.0}


 48%|████▊     | 2432/5120 [14:05<14:08,  3.17it/s]  

{'loss': 0.0001, 'learning_rate': 5.2500000000000006e-06, 'epoch': 19.0}


                                                   
 48%|████▊     | 2432/5120 [14:09<14:08,  3.17it/s]

{'eval_loss': 1.2539376020431519, 'eval_accuracy': 0.8402366863905325, 'eval_runtime': 4.1456, 'eval_samples_per_second': 81.531, 'eval_steps_per_second': 10.372, 'epoch': 19.0}


 50%|█████     | 2560/5120 [14:49<13:28,  3.16it/s]  

{'loss': 0.0001, 'learning_rate': 5e-06, 'epoch': 20.0}


                                                   
 50%|█████     | 2560/5120 [14:53<13:28,  3.16it/s]

{'eval_loss': 1.2552906274795532, 'eval_accuracy': 0.8431952662721893, 'eval_runtime': 4.1508, 'eval_samples_per_second': 81.429, 'eval_steps_per_second': 10.359, 'epoch': 20.0}


 52%|█████▎    | 2688/5120 [15:34<12:49,  3.16it/s]  

{'loss': 0.0001, 'learning_rate': 4.75e-06, 'epoch': 21.0}


                                                   
 52%|█████▎    | 2688/5120 [15:38<12:49,  3.16it/s]

{'eval_loss': 1.2394037246704102, 'eval_accuracy': 0.849112426035503, 'eval_runtime': 4.17, 'eval_samples_per_second': 81.056, 'eval_steps_per_second': 10.312, 'epoch': 21.0}


 55%|█████▌    | 2816/5120 [16:19<12:11,  3.15it/s]  

{'loss': 0.0, 'learning_rate': 4.5e-06, 'epoch': 22.0}


                                                   
 55%|█████▌    | 2816/5120 [16:23<12:11,  3.15it/s]

{'eval_loss': 1.231818675994873, 'eval_accuracy': 0.8520710059171598, 'eval_runtime': 4.1669, 'eval_samples_per_second': 81.116, 'eval_steps_per_second': 10.32, 'epoch': 22.0}


 57%|█████▊    | 2944/5120 [17:03<11:27,  3.17it/s]  

{'loss': 0.0, 'learning_rate': 4.25e-06, 'epoch': 23.0}


                                                   
 57%|█████▊    | 2944/5120 [17:07<11:27,  3.17it/s]

{'eval_loss': 1.3041226863861084, 'eval_accuracy': 0.8431952662721893, 'eval_runtime': 4.1553, 'eval_samples_per_second': 81.342, 'eval_steps_per_second': 10.348, 'epoch': 23.0}


 60%|██████    | 3072/5120 [17:48<10:48,  3.16it/s]

{'loss': 0.0, 'learning_rate': 4.000000000000001e-06, 'epoch': 24.0}


                                                   
 60%|██████    | 3072/5120 [17:52<10:48,  3.16it/s]

{'eval_loss': 1.3354992866516113, 'eval_accuracy': 0.8372781065088757, 'eval_runtime': 4.1597, 'eval_samples_per_second': 81.257, 'eval_steps_per_second': 10.337, 'epoch': 24.0}


 62%|██████▎   | 3200/5120 [18:32<10:07,  3.16it/s]

{'loss': 0.0, 'learning_rate': 3.7500000000000005e-06, 'epoch': 25.0}


                                                   
 62%|██████▎   | 3200/5120 [18:37<10:07,  3.16it/s]

{'eval_loss': 1.269469976425171, 'eval_accuracy': 0.8520710059171598, 'eval_runtime': 4.1501, 'eval_samples_per_second': 81.443, 'eval_steps_per_second': 10.361, 'epoch': 25.0}


 65%|██████▌   | 3328/5120 [19:17<09:28,  3.15it/s]

{'loss': 0.0, 'learning_rate': 3.5e-06, 'epoch': 26.0}


                                                   
 65%|██████▌   | 3328/5120 [19:21<09:28,  3.15it/s]

{'eval_loss': 1.2610201835632324, 'eval_accuracy': 0.8550295857988166, 'eval_runtime': 4.1545, 'eval_samples_per_second': 81.357, 'eval_steps_per_second': 10.35, 'epoch': 26.0}


 68%|██████▊   | 3456/5120 [20:02<08:45,  3.17it/s]

{'loss': 0.0, 'learning_rate': 3.2500000000000002e-06, 'epoch': 27.0}


                                                   
 68%|██████▊   | 3456/5120 [20:06<08:45,  3.17it/s]

{'eval_loss': 1.278537631034851, 'eval_accuracy': 0.8520710059171598, 'eval_runtime': 4.1647, 'eval_samples_per_second': 81.158, 'eval_steps_per_second': 10.325, 'epoch': 27.0}


 70%|███████   | 3584/5120 [20:46<08:04,  3.17it/s]

{'loss': 0.0005, 'learning_rate': 3e-06, 'epoch': 28.0}


                                                   
 70%|███████   | 3584/5120 [20:51<08:04,  3.17it/s]

{'eval_loss': 1.2489559650421143, 'eval_accuracy': 0.8520710059171598, 'eval_runtime': 4.1649, 'eval_samples_per_second': 81.155, 'eval_steps_per_second': 10.324, 'epoch': 28.0}


 72%|███████▎  | 3712/5120 [21:31<07:24,  3.17it/s]

{'loss': 0.0, 'learning_rate': 2.7500000000000004e-06, 'epoch': 29.0}


                                                   
 72%|███████▎  | 3712/5120 [21:35<07:24,  3.17it/s]

{'eval_loss': 1.2697210311889648, 'eval_accuracy': 0.8579881656804734, 'eval_runtime': 4.1666, 'eval_samples_per_second': 81.122, 'eval_steps_per_second': 10.32, 'epoch': 29.0}


 75%|███████▌  | 3840/5120 [22:16<06:44,  3.16it/s]

{'loss': 0.0, 'learning_rate': 2.5e-06, 'epoch': 30.0}


                                                   
 75%|███████▌  | 3840/5120 [22:20<06:44,  3.16it/s]

{'eval_loss': 1.2580984830856323, 'eval_accuracy': 0.8579881656804734, 'eval_runtime': 4.1701, 'eval_samples_per_second': 81.053, 'eval_steps_per_second': 10.312, 'epoch': 30.0}


 78%|███████▊  | 3968/5120 [23:00<06:02,  3.18it/s]

{'loss': 0.0, 'learning_rate': 2.25e-06, 'epoch': 31.0}


                                                   
 78%|███████▊  | 3968/5120 [23:04<06:02,  3.18it/s]

{'eval_loss': 1.2703007459640503, 'eval_accuracy': 0.8579881656804734, 'eval_runtime': 4.171, 'eval_samples_per_second': 81.035, 'eval_steps_per_second': 10.309, 'epoch': 31.0}


 80%|████████  | 4096/5120 [23:45<05:23,  3.17it/s]

{'loss': 0.0, 'learning_rate': 2.0000000000000003e-06, 'epoch': 32.0}


                                                   
 80%|████████  | 4096/5120 [23:49<05:23,  3.17it/s]

{'eval_loss': 1.2544406652450562, 'eval_accuracy': 0.8550295857988166, 'eval_runtime': 4.1648, 'eval_samples_per_second': 81.156, 'eval_steps_per_second': 10.325, 'epoch': 32.0}


 82%|████████▎ | 4224/5120 [24:30<04:42,  3.17it/s]

{'loss': 0.0, 'learning_rate': 1.75e-06, 'epoch': 33.0}


                                                   
 82%|████████▎ | 4224/5120 [24:34<04:42,  3.17it/s]

{'eval_loss': 1.2512383460998535, 'eval_accuracy': 0.8550295857988166, 'eval_runtime': 4.1625, 'eval_samples_per_second': 81.201, 'eval_steps_per_second': 10.33, 'epoch': 33.0}


 85%|████████▌ | 4352/5120 [25:14<04:02,  3.17it/s]

{'loss': 0.0, 'learning_rate': 1.5e-06, 'epoch': 34.0}


                                                   
 85%|████████▌ | 4352/5120 [25:18<04:02,  3.17it/s]

{'eval_loss': 1.2731902599334717, 'eval_accuracy': 0.8579881656804734, 'eval_runtime': 4.1634, 'eval_samples_per_second': 81.183, 'eval_steps_per_second': 10.328, 'epoch': 34.0}


 88%|████████▊ | 4480/5120 [25:59<03:22,  3.16it/s]

{'loss': 0.0002, 'learning_rate': 1.25e-06, 'epoch': 35.0}


                                                   
 88%|████████▊ | 4480/5120 [26:03<03:22,  3.16it/s]

{'eval_loss': 1.2047386169433594, 'eval_accuracy': 0.8609467455621301, 'eval_runtime': 4.169, 'eval_samples_per_second': 81.074, 'eval_steps_per_second': 10.314, 'epoch': 35.0}


 90%|█████████ | 4608/5120 [26:43<02:41,  3.17it/s]

{'loss': 0.0, 'learning_rate': 1.0000000000000002e-06, 'epoch': 36.0}


                                                   
 90%|█████████ | 4608/5120 [26:48<02:41,  3.17it/s]

{'eval_loss': 1.2396539449691772, 'eval_accuracy': 0.8579881656804734, 'eval_runtime': 4.1674, 'eval_samples_per_second': 81.105, 'eval_steps_per_second': 10.318, 'epoch': 36.0}


 92%|█████████▎| 4736/5120 [27:28<02:01,  3.17it/s]

{'loss': 0.0, 'learning_rate': 7.5e-07, 'epoch': 37.0}


                                                   
 92%|█████████▎| 4736/5120 [27:32<02:01,  3.17it/s]

{'eval_loss': 1.251322865486145, 'eval_accuracy': 0.8609467455621301, 'eval_runtime': 4.174, 'eval_samples_per_second': 80.977, 'eval_steps_per_second': 10.302, 'epoch': 37.0}


 95%|█████████▌| 4864/5120 [28:13<01:21,  3.15it/s]

{'loss': 0.0, 'learning_rate': 5.000000000000001e-07, 'epoch': 38.0}


                                                   
 95%|█████████▌| 4864/5120 [28:17<01:21,  3.15it/s]

{'eval_loss': 1.2964636087417603, 'eval_accuracy': 0.8668639053254438, 'eval_runtime': 4.1667, 'eval_samples_per_second': 81.119, 'eval_steps_per_second': 10.32, 'epoch': 38.0}


 98%|█████████▊| 4992/5120 [28:58<00:40,  3.16it/s]

{'loss': 0.0001, 'learning_rate': 2.5000000000000004e-07, 'epoch': 39.0}


                                                   
 98%|█████████▊| 4992/5120 [29:02<00:40,  3.16it/s]

{'eval_loss': 1.3593791723251343, 'eval_accuracy': 0.849112426035503, 'eval_runtime': 4.1684, 'eval_samples_per_second': 81.086, 'eval_steps_per_second': 10.316, 'epoch': 39.0}


100%|██████████| 5120/5120 [29:43<00:00,  3.16it/s]

{'loss': 0.0, 'learning_rate': 0.0, 'epoch': 40.0}


                                                   
100%|██████████| 5120/5120 [29:47<00:00,  2.86it/s]

{'eval_loss': 1.3891386985778809, 'eval_accuracy': 0.849112426035503, 'eval_runtime': 4.1611, 'eval_samples_per_second': 81.228, 'eval_steps_per_second': 10.334, 'epoch': 40.0}
{'train_runtime': 1787.9838, 'train_samples_per_second': 22.908, 'train_steps_per_second': 2.864, 'train_loss': 0.03841363186561466, 'epoch': 40.0}
Execution Time : 1788 seconds





In [15]:
trainer.evaluate()

100%|██████████| 43/43 [00:04<00:00, 10.62it/s]


{'eval_loss': 1.3891386985778809,
 'eval_accuracy': 0.849112426035503,
 'eval_runtime': 4.1505,
 'eval_samples_per_second': 81.435,
 'eval_steps_per_second': 10.36,
 'epoch': 40.0}