In [1]:
import pandas as pd

finetuned_dirname = "40-epoch-bert-base-finetuned-phemernr2-tf"

data = pd.read_csv("../../data/processed/phemernr2-tf_dataset.csv", sep=",")
data = data[['tweet_text', 'tvt2', 'label']]
data['tweet_text'] = data['tweet_text'].str.lower()
print(data.shape)
data.head()

(1705, 3)


Unnamed: 0,tweet_text,tvt2,label
0,breaking - a germanwings airbus a320 plane rep...,training,True
1,reports that two of the dead in the #charliehe...,training,True
2,'no survivors' in #germanwings crash says fren...,training,False
3,tragedy mounts as soldier shot this am dies of...,training,True
4,watch the moment gunfire and explosions were h...,training,True


In [2]:
combined_data = data

In [3]:
import torch

class CustomTextDataset(torch.utils.data.dataset.Dataset):

    def __init__(self, texts, labels):
        self.labels = labels
        self.texts = texts
        self.attention_mask = None
        self.input_ids = None
        self.token_type_ids = None

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        sample = {
            "text": self.texts[idx],
            "label": self.labels[idx],
            "attention_mask": self.attention_mask[idx] if self.attention_mask else None,
            "input_ids": self.input_ids[idx] if self.input_ids else None,
            "token_type_ids": self.token_type_ids[idx] if self.token_type_ids else None
        }
        return sample
    
    def tokenize(self, tokenizer):
        self.attention_mask = []
        self.input_ids = []
        self.token_type_ids = []

        for text in self.texts:
            token = tokenizer(text, padding="max_length", truncation=True)
            
            self.attention_mask.append(token['attention_mask'])
            self.input_ids.append(token['input_ids'])
            self.token_type_ids.append(token['token_type_ids'])

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
labels = []

labels_str = combined_data['label'].unique().tolist()
for i, d in combined_data.iterrows():
    lab = labels_str.index(d['label'])
    labels.append(lab)
    
print(len(labels))
labels[:10]

1705


[0, 0, 1, 0, 0, 0, 0, 1, 0, 0]

In [5]:
train_dataset = CustomTextDataset(
    [d['tweet_text'] for i, d in combined_data.iterrows() if d['tvt2'] == 'training'],
    [labels[i] for i, d in combined_data.iterrows() if d['tvt2'] == 'training'])
test_dataset = CustomTextDataset(
    [d['tweet_text'] for i, d in combined_data.iterrows() if d['tvt2'] == 'validation'],
    [labels[i] for i, d in combined_data.iterrows() if d['tvt2'] == 'validation'])
train_dataset[0]

{'text': 'breaking - a germanwings airbus a320 plane reportedly crashed in the region of digne (french alps) #flightradar24 - french tv #itele',
 'label': 0,
 'attention_mask': None,
 'input_ids': None,
 'token_type_ids': None}

In [6]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")

In [7]:
# inputs = tokenizer(["you're stuck in a timewrap from 2004 though", "summa lumma dumma lumma"], padding="max_length", truncation=True)
# for k,v in inputs.items():
#     print(k)

In [8]:
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

train_dataset.tokenize(tokenizer)
test_dataset.tokenize(tokenizer)

In [9]:
print(len(train_dataset))
print(len(test_dataset))

1176
371


### Fine Tuning

In [10]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased",
                                                           output_hidden_states=False,
                                                           num_labels=2)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [11]:
from transformers import TrainingArguments

epochs = 40
batch_size = 8
save_steps = (round((len(train_dataset)/batch_size) + 0.49)) * epochs
# save_steps = 1_000_000

training_args = TrainingArguments(
    output_dir=f"../../data/models/{finetuned_dirname}",
    num_train_epochs=epochs,
    save_steps=save_steps,
    logging_steps=300,
    learning_rate=1e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    evaluation_strategy="epoch",
    logging_strategy="epoch"
)

print(f"Save Steps : {save_steps}")

Save Steps : 5880


In [12]:
import numpy as np
from datasets import load_metric

metric = load_metric("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

  metric = load_metric("accuracy")


In [13]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
)

In [14]:
import time

start = time.time()

trainer.train()

print(f"Execution Time : {round(time.time() - start)} seconds")

  2%|▎         | 147/5880 [00:46<29:01,  3.29it/s]

{'loss': 0.5768, 'learning_rate': 9.75e-06, 'epoch': 1.0}


                                                  
  2%|▎         | 147/5880 [00:50<29:01,  3.29it/s]

{'eval_loss': 0.45760130882263184, 'eval_accuracy': 0.7789757412398922, 'eval_runtime': 4.3609, 'eval_samples_per_second': 85.074, 'eval_steps_per_second': 10.778, 'epoch': 1.0}


  5%|▌         | 294/5880 [01:34<27:57,  3.33it/s]  

{'loss': 0.3681, 'learning_rate': 9.5e-06, 'epoch': 2.0}


                                                  
  5%|▌         | 294/5880 [01:39<27:57,  3.33it/s]

{'eval_loss': 0.42155423760414124, 'eval_accuracy': 0.8113207547169812, 'eval_runtime': 4.3412, 'eval_samples_per_second': 85.46, 'eval_steps_per_second': 10.826, 'epoch': 2.0}


  8%|▊         | 441/5880 [02:23<27:12,  3.33it/s]  

{'loss': 0.2477, 'learning_rate': 9.250000000000001e-06, 'epoch': 3.0}


                                                  
  8%|▊         | 441/5880 [02:27<27:12,  3.33it/s]

{'eval_loss': 0.5289597511291504, 'eval_accuracy': 0.816711590296496, 'eval_runtime': 4.3404, 'eval_samples_per_second': 85.475, 'eval_steps_per_second': 10.828, 'epoch': 3.0}


 10%|█         | 588/5880 [03:12<26:58,  3.27it/s]  

{'loss': 0.1749, 'learning_rate': 9e-06, 'epoch': 4.0}


                                                  
 10%|█         | 588/5880 [03:16<26:58,  3.27it/s]

{'eval_loss': 0.699114203453064, 'eval_accuracy': 0.862533692722372, 'eval_runtime': 4.3304, 'eval_samples_per_second': 85.673, 'eval_steps_per_second': 10.853, 'epoch': 4.0}


 12%|█▎        | 735/5880 [04:00<25:40,  3.34it/s]  

{'loss': 0.1221, 'learning_rate': 8.750000000000001e-06, 'epoch': 5.0}


                                                  
 12%|█▎        | 735/5880 [04:04<25:40,  3.34it/s]

{'eval_loss': 0.813473105430603, 'eval_accuracy': 0.8194070080862533, 'eval_runtime': 4.3278, 'eval_samples_per_second': 85.724, 'eval_steps_per_second': 10.86, 'epoch': 5.0}


 15%|█▌        | 882/5880 [04:49<25:02,  3.33it/s]  

{'loss': 0.0705, 'learning_rate': 8.5e-06, 'epoch': 6.0}


                                                  
 15%|█▌        | 882/5880 [04:53<25:02,  3.33it/s]

{'eval_loss': 0.7974358201026917, 'eval_accuracy': 0.8652291105121294, 'eval_runtime': 4.343, 'eval_samples_per_second': 85.425, 'eval_steps_per_second': 10.822, 'epoch': 6.0}


 18%|█▊        | 1029/5880 [05:40<25:29,  3.17it/s] 

{'loss': 0.0366, 'learning_rate': 8.25e-06, 'epoch': 7.0}


                                                   
 18%|█▊        | 1029/5880 [05:44<25:29,  3.17it/s]

{'eval_loss': 0.8027729392051697, 'eval_accuracy': 0.8787061994609164, 'eval_runtime': 4.6529, 'eval_samples_per_second': 79.735, 'eval_steps_per_second': 10.101, 'epoch': 7.0}


 20%|██        | 1176/5880 [06:31<24:50,  3.16it/s]  

{'loss': 0.0135, 'learning_rate': 8.000000000000001e-06, 'epoch': 8.0}


                                                   
 20%|██        | 1176/5880 [06:35<24:50,  3.16it/s]

{'eval_loss': 0.9369971752166748, 'eval_accuracy': 0.8679245283018868, 'eval_runtime': 4.6466, 'eval_samples_per_second': 79.843, 'eval_steps_per_second': 10.115, 'epoch': 8.0}


 22%|██▎       | 1323/5880 [07:22<24:01,  3.16it/s]  

{'loss': 0.0114, 'learning_rate': 7.75e-06, 'epoch': 9.0}


                                                   
 22%|██▎       | 1323/5880 [07:27<24:01,  3.16it/s]

{'eval_loss': 0.9783257842063904, 'eval_accuracy': 0.8787061994609164, 'eval_runtime': 4.611, 'eval_samples_per_second': 80.459, 'eval_steps_per_second': 10.193, 'epoch': 9.0}


 25%|██▌       | 1470/5880 [08:13<23:10,  3.17it/s]  

{'loss': 0.0087, 'learning_rate': 7.500000000000001e-06, 'epoch': 10.0}


                                                   
 25%|██▌       | 1470/5880 [08:18<23:10,  3.17it/s]

{'eval_loss': 0.9691874384880066, 'eval_accuracy': 0.8787061994609164, 'eval_runtime': 4.6156, 'eval_samples_per_second': 80.379, 'eval_steps_per_second': 10.183, 'epoch': 10.0}


 28%|██▊       | 1617/5880 [09:04<22:25,  3.17it/s]  

{'loss': 0.011, 'learning_rate': 7.25e-06, 'epoch': 11.0}


                                                   
 28%|██▊       | 1617/5880 [09:09<22:25,  3.17it/s]

{'eval_loss': 1.0631400346755981, 'eval_accuracy': 0.8652291105121294, 'eval_runtime': 4.6151, 'eval_samples_per_second': 80.389, 'eval_steps_per_second': 10.184, 'epoch': 11.0}


 30%|███       | 1764/5880 [09:55<21:39,  3.17it/s]  

{'loss': 0.0001, 'learning_rate': 7e-06, 'epoch': 12.0}


                                                   
 30%|███       | 1764/5880 [10:00<21:39,  3.17it/s]

{'eval_loss': 1.014373540878296, 'eval_accuracy': 0.8733153638814016, 'eval_runtime': 4.6101, 'eval_samples_per_second': 80.475, 'eval_steps_per_second': 10.195, 'epoch': 12.0}


 32%|███▎      | 1911/5880 [10:47<20:56,  3.16it/s]  

{'loss': 0.0003, 'learning_rate': 6.750000000000001e-06, 'epoch': 13.0}


                                                   
 32%|███▎      | 1911/5880 [10:51<20:56,  3.16it/s]

{'eval_loss': 1.0178828239440918, 'eval_accuracy': 0.876010781671159, 'eval_runtime': 4.6063, 'eval_samples_per_second': 80.541, 'eval_steps_per_second': 10.203, 'epoch': 13.0}


 35%|███▌      | 2058/5880 [11:38<20:06,  3.17it/s]  

{'loss': 0.007, 'learning_rate': 6.5000000000000004e-06, 'epoch': 14.0}


                                                   
 35%|███▌      | 2058/5880 [11:42<20:06,  3.17it/s]

{'eval_loss': 1.1287373304367065, 'eval_accuracy': 0.8679245283018868, 'eval_runtime': 4.6142, 'eval_samples_per_second': 80.404, 'eval_steps_per_second': 10.186, 'epoch': 14.0}


 38%|███▊      | 2205/5880 [12:29<19:19,  3.17it/s]  

{'loss': 0.0028, 'learning_rate': 6.25e-06, 'epoch': 15.0}


                                                   
 38%|███▊      | 2205/5880 [12:33<19:19,  3.17it/s]

{'eval_loss': 1.0344796180725098, 'eval_accuracy': 0.8814016172506739, 'eval_runtime': 4.6064, 'eval_samples_per_second': 80.539, 'eval_steps_per_second': 10.203, 'epoch': 15.0}


 40%|████      | 2352/5880 [13:20<18:34,  3.17it/s]  

{'loss': 0.003, 'learning_rate': 6e-06, 'epoch': 16.0}


                                                   
 40%|████      | 2352/5880 [13:25<18:34,  3.17it/s]

{'eval_loss': 1.0907526016235352, 'eval_accuracy': 0.8787061994609164, 'eval_runtime': 4.6187, 'eval_samples_per_second': 80.326, 'eval_steps_per_second': 10.176, 'epoch': 16.0}


 42%|████▎     | 2499/5880 [14:11<17:48,  3.17it/s]  

{'loss': 0.0, 'learning_rate': 5.75e-06, 'epoch': 17.0}


                                                   
 42%|████▎     | 2499/5880 [14:16<17:48,  3.17it/s]

{'eval_loss': 1.3165457248687744, 'eval_accuracy': 0.8679245283018868, 'eval_runtime': 4.6115, 'eval_samples_per_second': 80.45, 'eval_steps_per_second': 10.192, 'epoch': 17.0}


 45%|████▌     | 2646/5880 [15:02<17:09,  3.14it/s]  

{'loss': 0.0049, 'learning_rate': 5.500000000000001e-06, 'epoch': 18.0}


                                                   
 45%|████▌     | 2646/5880 [15:07<17:09,  3.14it/s]

{'eval_loss': 1.1120622158050537, 'eval_accuracy': 0.8787061994609164, 'eval_runtime': 4.6099, 'eval_samples_per_second': 80.479, 'eval_steps_per_second': 10.195, 'epoch': 18.0}


 48%|████▊     | 2793/5880 [15:53<16:14,  3.17it/s]  

{'loss': 0.0013, 'learning_rate': 5.2500000000000006e-06, 'epoch': 19.0}


                                                   
 48%|████▊     | 2793/5880 [15:58<16:14,  3.17it/s]

{'eval_loss': 1.0664403438568115, 'eval_accuracy': 0.8840970350404312, 'eval_runtime': 4.6143, 'eval_samples_per_second': 80.403, 'eval_steps_per_second': 10.186, 'epoch': 19.0}


 50%|█████     | 2940/5880 [16:45<15:31,  3.16it/s]  

{'loss': 0.0003, 'learning_rate': 5e-06, 'epoch': 20.0}


                                                   
 50%|█████     | 2940/5880 [16:49<15:31,  3.16it/s]

{'eval_loss': 1.1984245777130127, 'eval_accuracy': 0.876010781671159, 'eval_runtime': 4.6135, 'eval_samples_per_second': 80.416, 'eval_steps_per_second': 10.187, 'epoch': 20.0}


 52%|█████▎    | 3087/5880 [17:36<14:44,  3.16it/s]  

{'loss': 0.0059, 'learning_rate': 4.75e-06, 'epoch': 21.0}


                                                   
 52%|█████▎    | 3087/5880 [17:40<14:44,  3.16it/s]

{'eval_loss': 1.3215742111206055, 'eval_accuracy': 0.8490566037735849, 'eval_runtime': 4.6125, 'eval_samples_per_second': 80.433, 'eval_steps_per_second': 10.19, 'epoch': 21.0}


 55%|█████▌    | 3234/5880 [18:27<13:54,  3.17it/s]  

{'loss': 0.0, 'learning_rate': 4.5e-06, 'epoch': 22.0}


                                                   
 55%|█████▌    | 3234/5880 [18:32<13:54,  3.17it/s]

{'eval_loss': 1.212669014930725, 'eval_accuracy': 0.8598382749326146, 'eval_runtime': 4.6023, 'eval_samples_per_second': 80.612, 'eval_steps_per_second': 10.212, 'epoch': 22.0}


 57%|█████▊    | 3381/5880 [19:18<13:09,  3.16it/s]  

{'loss': 0.0066, 'learning_rate': 4.25e-06, 'epoch': 23.0}


                                                   
 57%|█████▊    | 3381/5880 [19:23<13:09,  3.16it/s]

{'eval_loss': 1.1305339336395264, 'eval_accuracy': 0.8814016172506739, 'eval_runtime': 4.6374, 'eval_samples_per_second': 80.001, 'eval_steps_per_second': 10.135, 'epoch': 23.0}


 60%|██████    | 3528/5880 [20:10<12:27,  3.15it/s]  

{'loss': 0.0003, 'learning_rate': 4.000000000000001e-06, 'epoch': 24.0}


                                                   
 60%|██████    | 3528/5880 [20:14<12:27,  3.15it/s]

{'eval_loss': 1.1299155950546265, 'eval_accuracy': 0.876010781671159, 'eval_runtime': 4.607, 'eval_samples_per_second': 80.529, 'eval_steps_per_second': 10.202, 'epoch': 24.0}


 62%|██████▎   | 3675/5880 [21:01<11:37,  3.16it/s]  

{'loss': 0.0014, 'learning_rate': 3.7500000000000005e-06, 'epoch': 25.0}


                                                   
 62%|██████▎   | 3675/5880 [21:05<11:37,  3.16it/s]

{'eval_loss': 1.1767364740371704, 'eval_accuracy': 0.8787061994609164, 'eval_runtime': 4.6113, 'eval_samples_per_second': 80.454, 'eval_steps_per_second': 10.192, 'epoch': 25.0}


 65%|██████▌   | 3822/5880 [21:52<10:49,  3.17it/s]  

{'loss': 0.0008, 'learning_rate': 3.5e-06, 'epoch': 26.0}


                                                   
 65%|██████▌   | 3822/5880 [21:57<10:49,  3.17it/s]

{'eval_loss': 1.1180511713027954, 'eval_accuracy': 0.8867924528301887, 'eval_runtime': 4.6139, 'eval_samples_per_second': 80.409, 'eval_steps_per_second': 10.187, 'epoch': 26.0}


 68%|██████▊   | 3969/5880 [22:43<10:05,  3.16it/s]

{'loss': 0.0062, 'learning_rate': 3.2500000000000002e-06, 'epoch': 27.0}


                                                   
 68%|██████▊   | 3969/5880 [22:48<10:05,  3.16it/s]

{'eval_loss': 1.2457107305526733, 'eval_accuracy': 0.8733153638814016, 'eval_runtime': 4.6055, 'eval_samples_per_second': 80.555, 'eval_steps_per_second': 10.205, 'epoch': 27.0}


 70%|███████   | 4116/5880 [23:34<09:16,  3.17it/s]

{'loss': 0.0004, 'learning_rate': 3e-06, 'epoch': 28.0}


                                                   
 70%|███████   | 4116/5880 [23:39<09:16,  3.17it/s]

{'eval_loss': 1.2357717752456665, 'eval_accuracy': 0.8814016172506739, 'eval_runtime': 4.6103, 'eval_samples_per_second': 80.471, 'eval_steps_per_second': 10.194, 'epoch': 28.0}


 72%|███████▎  | 4263/5880 [24:25<08:30,  3.17it/s]

{'loss': 0.0099, 'learning_rate': 2.7500000000000004e-06, 'epoch': 29.0}


                                                   
 72%|███████▎  | 4263/5880 [24:30<08:30,  3.17it/s]

{'eval_loss': 1.231260895729065, 'eval_accuracy': 0.8706199460916442, 'eval_runtime': 4.6074, 'eval_samples_per_second': 80.523, 'eval_steps_per_second': 10.201, 'epoch': 29.0}


 75%|███████▌  | 4410/5880 [25:16<07:44,  3.17it/s]

{'loss': 0.0036, 'learning_rate': 2.5e-06, 'epoch': 30.0}


                                                   
 75%|███████▌  | 4410/5880 [25:21<07:44,  3.17it/s]

{'eval_loss': 1.2813546657562256, 'eval_accuracy': 0.8679245283018868, 'eval_runtime': 4.6125, 'eval_samples_per_second': 80.433, 'eval_steps_per_second': 10.19, 'epoch': 30.0}


 78%|███████▊  | 4557/5880 [26:08<06:58,  3.16it/s]

{'loss': 0.0, 'learning_rate': 2.25e-06, 'epoch': 31.0}


                                                   
 78%|███████▊  | 4557/5880 [26:12<06:58,  3.16it/s]

{'eval_loss': 1.1252379417419434, 'eval_accuracy': 0.8814016172506739, 'eval_runtime': 4.6121, 'eval_samples_per_second': 80.44, 'eval_steps_per_second': 10.191, 'epoch': 31.0}


 80%|████████  | 4704/5880 [26:59<06:13,  3.15it/s]

{'loss': 0.0072, 'learning_rate': 2.0000000000000003e-06, 'epoch': 32.0}


                                                   
 80%|████████  | 4704/5880 [27:03<06:13,  3.15it/s]

{'eval_loss': 1.272270679473877, 'eval_accuracy': 0.8733153638814016, 'eval_runtime': 4.6066, 'eval_samples_per_second': 80.536, 'eval_steps_per_second': 10.203, 'epoch': 32.0}


 82%|████████▎ | 4851/5880 [27:50<05:27,  3.14it/s]

{'loss': 0.0, 'learning_rate': 1.75e-06, 'epoch': 33.0}


                                                   
 82%|████████▎ | 4851/5880 [27:55<05:27,  3.14it/s]

{'eval_loss': 1.2116618156433105, 'eval_accuracy': 0.8840970350404312, 'eval_runtime': 4.6135, 'eval_samples_per_second': 80.415, 'eval_steps_per_second': 10.187, 'epoch': 33.0}


 85%|████████▌ | 4998/5880 [28:41<04:39,  3.16it/s]

{'loss': 0.0, 'learning_rate': 1.5e-06, 'epoch': 34.0}


                                                   
 85%|████████▌ | 4998/5880 [28:46<04:39,  3.16it/s]

{'eval_loss': 1.211334466934204, 'eval_accuracy': 0.8787061994609164, 'eval_runtime': 4.6145, 'eval_samples_per_second': 80.4, 'eval_steps_per_second': 10.185, 'epoch': 34.0}


 88%|████████▊ | 5145/5880 [29:32<03:51,  3.17it/s]

{'loss': 0.0, 'learning_rate': 1.25e-06, 'epoch': 35.0}


                                                   
 88%|████████▊ | 5145/5880 [29:37<03:51,  3.17it/s]

{'eval_loss': 1.2126216888427734, 'eval_accuracy': 0.8787061994609164, 'eval_runtime': 4.6038, 'eval_samples_per_second': 80.585, 'eval_steps_per_second': 10.209, 'epoch': 35.0}


 90%|█████████ | 5292/5880 [30:24<03:06,  3.16it/s]

{'loss': 0.0, 'learning_rate': 1.0000000000000002e-06, 'epoch': 36.0}


                                                   
 90%|█████████ | 5292/5880 [30:28<03:06,  3.16it/s]

{'eval_loss': 1.2290724515914917, 'eval_accuracy': 0.8787061994609164, 'eval_runtime': 4.6148, 'eval_samples_per_second': 80.394, 'eval_steps_per_second': 10.185, 'epoch': 36.0}


 92%|█████████▎| 5439/5880 [31:15<02:20,  3.14it/s]

{'loss': 0.0, 'learning_rate': 7.5e-07, 'epoch': 37.0}


                                                   
 92%|█████████▎| 5439/5880 [31:19<02:20,  3.14it/s]

{'eval_loss': 1.2231942415237427, 'eval_accuracy': 0.8814016172506739, 'eval_runtime': 4.6105, 'eval_samples_per_second': 80.469, 'eval_steps_per_second': 10.194, 'epoch': 37.0}


 95%|█████████▌| 5586/5880 [32:06<01:33,  3.16it/s]

{'loss': 0.0, 'learning_rate': 5.000000000000001e-07, 'epoch': 38.0}


                                                   
 95%|█████████▌| 5586/5880 [32:10<01:33,  3.16it/s]

{'eval_loss': 1.2393299341201782, 'eval_accuracy': 0.8787061994609164, 'eval_runtime': 4.6091, 'eval_samples_per_second': 80.493, 'eval_steps_per_second': 10.197, 'epoch': 38.0}


 98%|█████████▊| 5733/5880 [32:57<00:46,  3.16it/s]

{'loss': 0.0, 'learning_rate': 2.5000000000000004e-07, 'epoch': 39.0}


                                                   
 98%|█████████▊| 5733/5880 [33:02<00:46,  3.16it/s]

{'eval_loss': 1.2388825416564941, 'eval_accuracy': 0.8787061994609164, 'eval_runtime': 4.6038, 'eval_samples_per_second': 80.586, 'eval_steps_per_second': 10.209, 'epoch': 39.0}


100%|██████████| 5880/5880 [33:49<00:00,  3.17it/s]

{'loss': 0.0, 'learning_rate': 0.0, 'epoch': 40.0}


                                                   
100%|██████████| 5880/5880 [33:54<00:00,  2.89it/s]

{'eval_loss': 1.2390159368515015, 'eval_accuracy': 0.8787061994609164, 'eval_runtime': 4.6023, 'eval_samples_per_second': 80.612, 'eval_steps_per_second': 10.212, 'epoch': 40.0}
{'train_runtime': 2034.3127, 'train_samples_per_second': 23.123, 'train_steps_per_second': 2.89, 'train_loss': 0.0425915867978387, 'epoch': 40.0}
Execution Time : 2034 seconds





In [15]:
trainer.evaluate()

100%|██████████| 47/47 [00:04<00:00, 10.47it/s]


{'eval_loss': 1.2390159368515015,
 'eval_accuracy': 0.8787061994609164,
 'eval_runtime': 4.5901,
 'eval_samples_per_second': 80.827,
 'eval_steps_per_second': 10.24,
 'epoch': 40.0}