In [1]:
import pandas as pd

finetuned_dirname = "40-epoch-distilbert-base-finetuned-twitter15-rnr"

data = pd.read_csv("../../data/processed/twitter15_dataset_with_tvt.csv", lineterminator="\n")
data = data[['tweet_text', 'tvt2', 'label']]
print(data.shape)
data.head()

(1490, 3)


Unnamed: 0,tweet_text,tvt2,label
0,🔥ca kkk grand wizard 🔥 endorses @hillaryclinto...,training,unverified
1,an open letter to trump voters from his top st...,training,unverified
2,america is a nation of second chances —@potus ...,validation,non-rumor
3,"brandon marshall visits and offers advice, sup...",testting,non-rumor
4,rip elly may clampett: so sad to learn #beverl...,validation,true


In [2]:
combined_data = data

In [3]:
import torch

class CustomTextDataset(torch.utils.data.dataset.Dataset):

    def __init__(self, texts, labels):
        self.labels = labels
        self.texts = texts
        self.attention_mask = None
        self.input_ids = None
        self.token_type_ids = None

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        sample = {
            "text": self.texts[idx],
            "label": self.labels[idx],
            "attention_mask": self.attention_mask[idx] if self.attention_mask else None,
            "input_ids": self.input_ids[idx] if self.input_ids else None,
#             "token_type_ids": self.token_type_ids[idx] if self.token_type_ids else None
        }
        return sample
    
    def tokenize(self, tokenizer):
        self.attention_mask = []
        self.input_ids = []
        self.token_type_ids = []

        for text in self.texts:
            token = tokenizer(text, padding="max_length", truncation=True)
            
            self.attention_mask.append(token['attention_mask'])
            self.input_ids.append(token['input_ids'])
#             self.token_type_ids.append(token['token_type_ids'])

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
labels = []

labels_str = combined_data['label'].unique().tolist()
for i, d in combined_data.iterrows():
#     lab = labels_str.index(d['label'])
    if d['label'] in ['true', 'false', 'unverified']:
        lab = 0
    else:
        lab = 1
    labels.append(lab)
    
print(len(labels))
labels[:10]

1490


[0, 0, 1, 1, 0, 1, 0, 0, 0, 0]

In [5]:
train_dataset = CustomTextDataset(
    [d['tweet_text'] for i, d in combined_data.iterrows() if d['tvt2'] == 'training'],
    [labels[i] for i, d in combined_data.iterrows() if d['tvt2'] == 'training'])
test_dataset = CustomTextDataset(
    [d['tweet_text'] for i, d in combined_data.iterrows() if d['tvt2'] == 'validation'],
    [labels[i] for i, d in combined_data.iterrows() if d['tvt2'] == 'validation'])
train_dataset[0]

{'text': '🔥ca kkk grand wizard 🔥 endorses @hillaryclinton #neverhillary #trump2016 URL\r',
 'label': 0,
 'attention_mask': None,
 'input_ids': None}

In [6]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("distilbert-base-cased")

In [7]:
# inputs = tokenizer(["you're stuck in a timewrap from 2004 though", "summa lumma dumma lumma"], padding="max_length", truncation=True)
# for k,v in inputs.items():
#     print(k)

In [8]:
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

train_dataset.tokenize(tokenizer)
test_dataset.tokenize(tokenizer)

In [9]:
print(len(train_dataset))
print(len(test_dataset))

1024
338


### Fine Tuning

In [10]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-cased",
                                                           output_hidden_states=False,
                                                           num_labels=2)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-cased and are newly initialized: ['pre_classifier.bias', 'classifier.weight', 'classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [11]:
from transformers import TrainingArguments

epochs = 40
batch_size = 8
save_steps = (round((len(train_dataset)/batch_size) + 0.5)) * epochs
# save_steps = 1_000_000

training_args = TrainingArguments(
    output_dir=f"../../data/models/{finetuned_dirname}",
    num_train_epochs=epochs,
    save_steps=save_steps,
    logging_steps=300,
    learning_rate=1e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    evaluation_strategy="epoch",
    logging_strategy="epoch"
)

print(f"Save Steps : {save_steps}")

Save Steps : 5120


In [12]:
import numpy as np
from datasets import load_metric

metric = load_metric("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

  metric = load_metric("accuracy")


In [13]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
)

In [14]:
import time

start = time.time()

trainer.train()

print(f"Execution Time : {round(time.time() - start)} seconds")

  2%|▎         | 128/5120 [00:22<13:07,  6.34it/s]

{'loss': 0.5421, 'learning_rate': 9.75e-06, 'epoch': 1.0}


                                                  
  3%|▎         | 129/5120 [00:24<1:05:03,  1.28it/s]

{'eval_loss': 0.49339744448661804, 'eval_accuracy': 0.7337278106508875, 'eval_runtime': 2.074, 'eval_samples_per_second': 162.973, 'eval_steps_per_second': 20.733, 'epoch': 1.0}


  5%|▌         | 256/5120 [00:44<12:48,  6.33it/s]  

{'loss': 0.3447, 'learning_rate': 9.5e-06, 'epoch': 2.0}


                                                  
  5%|▌         | 257/5120 [00:46<1:03:28,  1.28it/s]

{'eval_loss': 0.5233179330825806, 'eval_accuracy': 0.8106508875739645, 'eval_runtime': 2.0763, 'eval_samples_per_second': 162.788, 'eval_steps_per_second': 20.71, 'epoch': 2.0}


  8%|▊         | 384/5120 [01:06<12:27,  6.34it/s]  

{'loss': 0.1786, 'learning_rate': 9.250000000000001e-06, 'epoch': 3.0}


                                                  
  8%|▊         | 385/5120 [01:09<1:02:05,  1.27it/s]

{'eval_loss': 0.5530083775520325, 'eval_accuracy': 0.8402366863905325, 'eval_runtime': 2.0867, 'eval_samples_per_second': 161.981, 'eval_steps_per_second': 20.607, 'epoch': 3.0}


 10%|█         | 512/5120 [01:29<12:11,  6.30it/s]  

{'loss': 0.0927, 'learning_rate': 9e-06, 'epoch': 4.0}


                                                  
 10%|█         | 513/5120 [01:31<1:00:32,  1.27it/s]

{'eval_loss': 0.5839837193489075, 'eval_accuracy': 0.8520710059171598, 'eval_runtime': 2.0877, 'eval_samples_per_second': 161.898, 'eval_steps_per_second': 20.596, 'epoch': 4.0}


 12%|█▎        | 640/5120 [01:51<11:57,  6.25it/s]  

{'loss': 0.043, 'learning_rate': 8.750000000000001e-06, 'epoch': 5.0}


                                                  
 13%|█▎        | 641/5120 [01:54<59:00,  1.26it/s]

{'eval_loss': 0.9530224204063416, 'eval_accuracy': 0.8372781065088757, 'eval_runtime': 2.0947, 'eval_samples_per_second': 161.36, 'eval_steps_per_second': 20.528, 'epoch': 5.0}


 15%|█▌        | 768/5120 [02:14<11:33,  6.28it/s]

{'loss': 0.0311, 'learning_rate': 8.5e-06, 'epoch': 6.0}


                                                  
 15%|█▌        | 769/5120 [02:16<57:32,  1.26it/s]

{'eval_loss': 0.741889238357544, 'eval_accuracy': 0.8609467455621301, 'eval_runtime': 2.1087, 'eval_samples_per_second': 160.287, 'eval_steps_per_second': 20.392, 'epoch': 6.0}


 18%|█▊        | 896/5120 [02:36<11:18,  6.22it/s]

{'loss': 0.017, 'learning_rate': 8.25e-06, 'epoch': 7.0}


                                                  
 18%|█▊        | 897/5120 [02:39<55:38,  1.26it/s]

{'eval_loss': 0.7260947823524475, 'eval_accuracy': 0.8698224852071006, 'eval_runtime': 2.0967, 'eval_samples_per_second': 161.206, 'eval_steps_per_second': 20.508, 'epoch': 7.0}


 20%|██        | 1024/5120 [02:59<10:52,  6.27it/s]

{'loss': 0.0124, 'learning_rate': 8.000000000000001e-06, 'epoch': 8.0}


                                                   
 20%|██        | 1025/5120 [03:01<54:02,  1.26it/s]

{'eval_loss': 0.7969847917556763, 'eval_accuracy': 0.8668639053254438, 'eval_runtime': 2.1026, 'eval_samples_per_second': 160.756, 'eval_steps_per_second': 20.451, 'epoch': 8.0}


 22%|██▎       | 1152/5120 [03:22<10:35,  6.25it/s]

{'loss': 0.0068, 'learning_rate': 7.75e-06, 'epoch': 9.0}


                                                   
 23%|██▎       | 1153/5120 [03:24<52:50,  1.25it/s]

{'eval_loss': 1.0106405019760132, 'eval_accuracy': 0.8402366863905325, 'eval_runtime': 2.1194, 'eval_samples_per_second': 159.482, 'eval_steps_per_second': 20.289, 'epoch': 9.0}


 25%|██▌       | 1280/5120 [03:44<10:14,  6.25it/s]

{'loss': 0.0044, 'learning_rate': 7.500000000000001e-06, 'epoch': 10.0}


                                                   
 25%|██▌       | 1281/5120 [03:46<50:47,  1.26it/s]

{'eval_loss': 0.8575365543365479, 'eval_accuracy': 0.8609467455621301, 'eval_runtime': 2.1068, 'eval_samples_per_second': 160.434, 'eval_steps_per_second': 20.41, 'epoch': 10.0}


 28%|██▊       | 1408/5120 [04:07<09:52,  6.26it/s]

{'loss': 0.0056, 'learning_rate': 7.25e-06, 'epoch': 11.0}


                                                   
 28%|██▊       | 1409/5120 [04:09<49:09,  1.26it/s]

{'eval_loss': 0.8961165547370911, 'eval_accuracy': 0.863905325443787, 'eval_runtime': 2.1092, 'eval_samples_per_second': 160.247, 'eval_steps_per_second': 20.386, 'epoch': 11.0}


 30%|███       | 1536/5120 [04:29<09:34,  6.23it/s]

{'loss': 0.0004, 'learning_rate': 7e-06, 'epoch': 12.0}


                                                   
 30%|███       | 1537/5120 [04:32<47:30,  1.26it/s]

{'eval_loss': 1.2592556476593018, 'eval_accuracy': 0.8372781065088757, 'eval_runtime': 2.1093, 'eval_samples_per_second': 160.245, 'eval_steps_per_second': 20.386, 'epoch': 12.0}


 32%|███▎      | 1664/5120 [04:52<09:13,  6.25it/s]

{'loss': 0.0002, 'learning_rate': 6.750000000000001e-06, 'epoch': 13.0}


                                                   
 33%|███▎      | 1665/5120 [04:54<45:53,  1.25it/s]

{'eval_loss': 1.033130168914795, 'eval_accuracy': 0.8579881656804734, 'eval_runtime': 2.11, 'eval_samples_per_second': 160.189, 'eval_steps_per_second': 20.379, 'epoch': 13.0}


 35%|███▌      | 1792/5120 [05:15<08:55,  6.21it/s]

{'loss': 0.0002, 'learning_rate': 6.5000000000000004e-06, 'epoch': 14.0}


                                                   
 35%|███▌      | 1793/5120 [05:17<44:16,  1.25it/s]

{'eval_loss': 1.0917158126831055, 'eval_accuracy': 0.8550295857988166, 'eval_runtime': 2.111, 'eval_samples_per_second': 160.113, 'eval_steps_per_second': 20.369, 'epoch': 14.0}


 38%|███▊      | 1920/5120 [05:37<08:32,  6.24it/s]

{'loss': 0.0001, 'learning_rate': 6.25e-06, 'epoch': 15.0}


                                                   
 38%|███▊      | 1921/5120 [05:40<42:19,  1.26it/s]

{'eval_loss': 1.0676372051239014, 'eval_accuracy': 0.849112426035503, 'eval_runtime': 2.1043, 'eval_samples_per_second': 160.623, 'eval_steps_per_second': 20.434, 'epoch': 15.0}


 40%|████      | 2048/5120 [06:00<08:13,  6.23it/s]

{'loss': 0.0001, 'learning_rate': 6e-06, 'epoch': 16.0}


                                                   
 40%|████      | 2049/5120 [06:02<40:47,  1.25it/s]

{'eval_loss': 1.0891433954238892, 'eval_accuracy': 0.8520710059171598, 'eval_runtime': 2.1139, 'eval_samples_per_second': 159.897, 'eval_steps_per_second': 20.342, 'epoch': 16.0}


 42%|████▎     | 2176/5120 [06:23<07:51,  6.24it/s]

{'loss': 0.0001, 'learning_rate': 5.75e-06, 'epoch': 17.0}


                                                   
 43%|████▎     | 2177/5120 [06:25<38:56,  1.26it/s]

{'eval_loss': 1.1178410053253174, 'eval_accuracy': 0.8520710059171598, 'eval_runtime': 2.1063, 'eval_samples_per_second': 160.474, 'eval_steps_per_second': 20.415, 'epoch': 17.0}


 45%|████▌     | 2304/5120 [06:45<07:31,  6.24it/s]

{'loss': 0.0001, 'learning_rate': 5.500000000000001e-06, 'epoch': 18.0}


                                                   
 45%|████▌     | 2305/5120 [06:48<37:18,  1.26it/s]

{'eval_loss': 1.1281781196594238, 'eval_accuracy': 0.8550295857988166, 'eval_runtime': 2.1097, 'eval_samples_per_second': 160.21, 'eval_steps_per_second': 20.382, 'epoch': 18.0}


 48%|████▊     | 2432/5120 [07:08<07:11,  6.23it/s]

{'loss': 0.0001, 'learning_rate': 5.2500000000000006e-06, 'epoch': 19.0}


                                                   
 48%|████▊     | 2433/5120 [07:10<35:38,  1.26it/s]

{'eval_loss': 1.1271029710769653, 'eval_accuracy': 0.8520710059171598, 'eval_runtime': 2.1085, 'eval_samples_per_second': 160.3, 'eval_steps_per_second': 20.393, 'epoch': 19.0}


 50%|█████     | 2560/5120 [07:31<06:52,  6.21it/s]

{'loss': 0.0001, 'learning_rate': 5e-06, 'epoch': 20.0}


                                                   
 50%|█████     | 2561/5120 [07:33<34:07,  1.25it/s]

{'eval_loss': 1.0843974351882935, 'eval_accuracy': 0.8550295857988166, 'eval_runtime': 2.1234, 'eval_samples_per_second': 159.182, 'eval_steps_per_second': 20.251, 'epoch': 20.0}


 52%|█████▎    | 2688/5120 [07:54<06:32,  6.19it/s]

{'loss': 0.0001, 'learning_rate': 4.75e-06, 'epoch': 21.0}


                                                   
 53%|█████▎    | 2689/5120 [07:56<32:17,  1.25it/s]

{'eval_loss': 1.2012686729431152, 'eval_accuracy': 0.849112426035503, 'eval_runtime': 2.1138, 'eval_samples_per_second': 159.901, 'eval_steps_per_second': 20.342, 'epoch': 21.0}


 55%|█████▌    | 2816/5120 [08:16<06:09,  6.24it/s]

{'loss': 0.0001, 'learning_rate': 4.5e-06, 'epoch': 22.0}


                                                   
 55%|█████▌    | 2817/5120 [08:19<30:26,  1.26it/s]

{'eval_loss': 1.2030264139175415, 'eval_accuracy': 0.849112426035503, 'eval_runtime': 2.1028, 'eval_samples_per_second': 160.742, 'eval_steps_per_second': 20.449, 'epoch': 22.0}


 57%|█████▊    | 2944/5120 [08:39<05:50,  6.20it/s]

{'loss': 0.0001, 'learning_rate': 4.25e-06, 'epoch': 23.0}


                                                   
 58%|█████▊    | 2945/5120 [08:41<28:52,  1.26it/s]

{'eval_loss': 1.203903317451477, 'eval_accuracy': 0.849112426035503, 'eval_runtime': 2.1087, 'eval_samples_per_second': 160.286, 'eval_steps_per_second': 20.391, 'epoch': 23.0}


 60%|██████    | 3072/5120 [09:02<05:28,  6.23it/s]

{'loss': 0.0001, 'learning_rate': 4.000000000000001e-06, 'epoch': 24.0}


                                                   
 60%|██████    | 3073/5120 [09:04<27:07,  1.26it/s]

{'eval_loss': 1.223136067390442, 'eval_accuracy': 0.849112426035503, 'eval_runtime': 2.1054, 'eval_samples_per_second': 160.537, 'eval_steps_per_second': 20.423, 'epoch': 24.0}


 62%|██████▎   | 3200/5120 [09:24<05:07,  6.25it/s]

{'loss': 0.0001, 'learning_rate': 3.7500000000000005e-06, 'epoch': 25.0}


                                                   
 63%|██████▎   | 3201/5120 [09:27<25:26,  1.26it/s]

{'eval_loss': 1.2242908477783203, 'eval_accuracy': 0.849112426035503, 'eval_runtime': 2.1123, 'eval_samples_per_second': 160.018, 'eval_steps_per_second': 20.357, 'epoch': 25.0}


 65%|██████▌   | 3328/5120 [09:47<04:51,  6.14it/s]

{'loss': 0.0, 'learning_rate': 3.5e-06, 'epoch': 26.0}


                                                   
 65%|██████▌   | 3329/5120 [09:50<23:50,  1.25it/s]

{'eval_loss': 1.1987128257751465, 'eval_accuracy': 0.8520710059171598, 'eval_runtime': 2.1158, 'eval_samples_per_second': 159.751, 'eval_steps_per_second': 20.323, 'epoch': 26.0}


 68%|██████▊   | 3456/5120 [10:10<04:28,  6.19it/s]

{'loss': 0.004, 'learning_rate': 3.2500000000000002e-06, 'epoch': 27.0}


                                                   
 68%|██████▊   | 3457/5120 [10:12<22:08,  1.25it/s]

{'eval_loss': 1.155392050743103, 'eval_accuracy': 0.8520710059171598, 'eval_runtime': 2.1161, 'eval_samples_per_second': 159.725, 'eval_steps_per_second': 20.32, 'epoch': 27.0}


 70%|███████   | 3584/5120 [10:33<04:08,  6.19it/s]

{'loss': 0.0, 'learning_rate': 3e-06, 'epoch': 28.0}


                                                   
 70%|███████   | 3585/5120 [10:35<20:27,  1.25it/s]

{'eval_loss': 1.1897913217544556, 'eval_accuracy': 0.8520710059171598, 'eval_runtime': 2.1203, 'eval_samples_per_second': 159.413, 'eval_steps_per_second': 20.28, 'epoch': 28.0}


 72%|███████▎  | 3712/5120 [10:56<03:48,  6.16it/s]

{'loss': 0.0, 'learning_rate': 2.7500000000000004e-06, 'epoch': 29.0}


                                                   
 73%|███████▎  | 3713/5120 [10:58<18:39,  1.26it/s]

{'eval_loss': 1.206804871559143, 'eval_accuracy': 0.8520710059171598, 'eval_runtime': 2.104, 'eval_samples_per_second': 160.647, 'eval_steps_per_second': 20.437, 'epoch': 29.0}


 75%|███████▌  | 3840/5120 [11:18<03:25,  6.23it/s]

{'loss': 0.0, 'learning_rate': 2.5e-06, 'epoch': 30.0}


                                                   
 75%|███████▌  | 3841/5120 [11:21<16:57,  1.26it/s]

{'eval_loss': 1.2025309801101685, 'eval_accuracy': 0.8520710059171598, 'eval_runtime': 2.1077, 'eval_samples_per_second': 160.364, 'eval_steps_per_second': 20.401, 'epoch': 30.0}


 78%|███████▊  | 3968/5120 [11:41<03:06,  6.17it/s]

{'loss': 0.0, 'learning_rate': 2.25e-06, 'epoch': 31.0}


                                                   
 78%|███████▊  | 3969/5120 [11:43<15:17,  1.25it/s]

{'eval_loss': 1.1979306936264038, 'eval_accuracy': 0.849112426035503, 'eval_runtime': 2.1134, 'eval_samples_per_second': 159.93, 'eval_steps_per_second': 20.346, 'epoch': 31.0}


 80%|████████  | 4096/5120 [12:04<02:44,  6.22it/s]

{'loss': 0.0, 'learning_rate': 2.0000000000000003e-06, 'epoch': 32.0}


                                                   
 80%|████████  | 4097/5120 [12:06<13:33,  1.26it/s]

{'eval_loss': 1.180645227432251, 'eval_accuracy': 0.8461538461538461, 'eval_runtime': 2.1081, 'eval_samples_per_second': 160.331, 'eval_steps_per_second': 20.397, 'epoch': 32.0}


 82%|████████▎ | 4224/5120 [12:26<02:23,  6.24it/s]

{'loss': 0.0, 'learning_rate': 1.75e-06, 'epoch': 33.0}


                                                   
 83%|████████▎ | 4225/5120 [12:29<11:52,  1.26it/s]

{'eval_loss': 1.181540608406067, 'eval_accuracy': 0.8461538461538461, 'eval_runtime': 2.1111, 'eval_samples_per_second': 160.11, 'eval_steps_per_second': 20.369, 'epoch': 33.0}


 85%|████████▌ | 4352/5120 [12:49<02:03,  6.23it/s]

{'loss': 0.0, 'learning_rate': 1.5e-06, 'epoch': 34.0}


                                                   
 85%|████████▌ | 4353/5120 [12:51<10:09,  1.26it/s]

{'eval_loss': 1.1838288307189941, 'eval_accuracy': 0.8461538461538461, 'eval_runtime': 2.1057, 'eval_samples_per_second': 160.514, 'eval_steps_per_second': 20.42, 'epoch': 34.0}


 88%|████████▊ | 4480/5120 [13:12<01:42,  6.24it/s]

{'loss': 0.0, 'learning_rate': 1.25e-06, 'epoch': 35.0}


                                                   
 88%|████████▊ | 4481/5120 [13:14<08:28,  1.26it/s]

{'eval_loss': 1.1773884296417236, 'eval_accuracy': 0.849112426035503, 'eval_runtime': 2.1115, 'eval_samples_per_second': 160.079, 'eval_steps_per_second': 20.365, 'epoch': 35.0}


 90%|█████████ | 4608/5120 [13:34<01:21,  6.26it/s]

{'loss': 0.0, 'learning_rate': 1.0000000000000002e-06, 'epoch': 36.0}


                                                   
 90%|█████████ | 4609/5120 [13:37<06:45,  1.26it/s]

{'eval_loss': 1.19269859790802, 'eval_accuracy': 0.8461538461538461, 'eval_runtime': 2.105, 'eval_samples_per_second': 160.573, 'eval_steps_per_second': 20.428, 'epoch': 36.0}


 92%|█████████▎| 4736/5120 [13:57<01:01,  6.25it/s]

{'loss': 0.0, 'learning_rate': 7.5e-07, 'epoch': 37.0}


                                                   
 93%|█████████▎| 4737/5120 [13:59<05:03,  1.26it/s]

{'eval_loss': 1.1978296041488647, 'eval_accuracy': 0.8461538461538461, 'eval_runtime': 2.1031, 'eval_samples_per_second': 160.716, 'eval_steps_per_second': 20.446, 'epoch': 37.0}


 95%|█████████▌| 4864/5120 [14:20<00:41,  6.17it/s]

{'loss': 0.0, 'learning_rate': 5.000000000000001e-07, 'epoch': 38.0}


                                                   
 95%|█████████▌| 4865/5120 [14:22<03:23,  1.25it/s]

{'eval_loss': 1.196637749671936, 'eval_accuracy': 0.8461538461538461, 'eval_runtime': 2.1056, 'eval_samples_per_second': 160.524, 'eval_steps_per_second': 20.422, 'epoch': 38.0}


 98%|█████████▊| 4992/5120 [14:42<00:20,  6.15it/s]

{'loss': 0.0, 'learning_rate': 2.5000000000000004e-07, 'epoch': 39.0}


                                                   
 98%|█████████▊| 4993/5120 [14:45<01:41,  1.25it/s]

{'eval_loss': 1.1117480993270874, 'eval_accuracy': 0.8668639053254438, 'eval_runtime': 2.122, 'eval_samples_per_second': 159.287, 'eval_steps_per_second': 20.264, 'epoch': 39.0}


100%|██████████| 5120/5120 [15:06<00:00,  6.17it/s]

{'loss': 0.0, 'learning_rate': 0.0, 'epoch': 40.0}


                                                   
100%|██████████| 5120/5120 [15:08<00:00,  5.64it/s]

{'eval_loss': 1.112185001373291, 'eval_accuracy': 0.8668639053254438, 'eval_runtime': 2.1144, 'eval_samples_per_second': 159.855, 'eval_steps_per_second': 20.337, 'epoch': 40.0}
{'train_runtime': 908.4869, 'train_samples_per_second': 45.086, 'train_steps_per_second': 5.636, 'train_loss': 0.032119497603935086, 'epoch': 40.0}
Execution Time : 909 seconds





In [15]:
trainer.evaluate()

  0%|          | 0/43 [00:00<?, ?it/s]

100%|██████████| 43/43 [00:02<00:00, 20.77it/s]


{'eval_loss': 1.112185001373291,
 'eval_accuracy': 0.8668639053254438,
 'eval_runtime': 2.1214,
 'eval_samples_per_second': 159.326,
 'eval_steps_per_second': 20.269,
 'epoch': 40.0}