In [1]:
import pandas as pd

finetuned_dirname = "40-epoch-distilbert-base-finetuned-twitter15-tf"

data = pd.read_csv("../../data/processed/twitter15-tf_dataset.csv", lineterminator="\n")
data = data[['tweet_text', 'tvt2', 'label']]
print(data.shape)
data.head()

(742, 3)


Unnamed: 0,tweet_text,tvt2,label
0,rip elly may clampett: so sad to learn #beverl...,training,True
1,just in: missing afghan soldiers found trying ...,training,True
2,#riphulkhogan my heart is ripping like your sh...,training,False
3,a chick-fil-a manager allegedly banned this hi...,validation,False
4,islamic tribunal using sharia law in texas has...,validation,False


In [2]:
combined_data = data

In [3]:
import torch

class CustomTextDataset(torch.utils.data.dataset.Dataset):

    def __init__(self, texts, labels):
        self.labels = labels
        self.texts = texts
        self.attention_mask = None
        self.input_ids = None
        self.token_type_ids = None

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        sample = {
            "text": self.texts[idx],
            "label": self.labels[idx],
            "attention_mask": self.attention_mask[idx] if self.attention_mask else None,
            "input_ids": self.input_ids[idx] if self.input_ids else None,
#             "token_type_ids": self.token_type_ids[idx] if self.token_type_ids else None
        }
        return sample
    
    def tokenize(self, tokenizer):
        self.attention_mask = []
        self.input_ids = []
        self.token_type_ids = []

        for text in self.texts:
            token = tokenizer(text, padding="max_length", truncation=True)
            
            self.attention_mask.append(token['attention_mask'])
            self.input_ids.append(token['input_ids'])
#             self.token_type_ids.append(token['token_type_ids'])

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
labels = []

labels_str = combined_data['label'].unique().tolist()
for i, d in combined_data.iterrows():
    lab = labels_str.index(d['label'])
    labels.append(lab)
    
print(len(labels))
labels[:10]

742


[0, 0, 1, 1, 1, 1, 1, 1, 0, 1]

In [5]:
train_dataset = CustomTextDataset(
    [d['tweet_text'] for i, d in combined_data.iterrows() if d['tvt2'] == 'training'],
    [labels[i] for i, d in combined_data.iterrows() if d['tvt2'] == 'training'])
test_dataset = CustomTextDataset(
    [d['tweet_text'] for i, d in combined_data.iterrows() if d['tvt2'] == 'validation'],
    [labels[i] for i, d in combined_data.iterrows() if d['tvt2'] == 'validation'])
train_dataset[0]

{'text': 'rip elly may clampett: so sad to learn #beverlyhillbillies star donna douglas has passed away. URL\r',
 'label': 0,
 'attention_mask': None,
 'input_ids': None}

In [6]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("distilbert-base-cased")

In [7]:
# inputs = tokenizer(["you're stuck in a timewrap from 2004 though", "summa lumma dumma lumma"], padding="max_length", truncation=True)
# for k,v in inputs.items():
#     print(k)

In [8]:
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

train_dataset.tokenize(tokenizer)
test_dataset.tokenize(tokenizer)

In [9]:
print(len(train_dataset))
print(len(test_dataset))

497
152


### Fine Tuning

In [10]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-cased",
                                                           output_hidden_states=False,
                                                           num_labels=2)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-cased and are newly initialized: ['pre_classifier.bias', 'classifier.weight', 'classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [11]:
from transformers import TrainingArguments

epochs = 40
batch_size = 8
save_steps = (round((len(train_dataset)/batch_size) + 0.5)) * epochs
# save_steps = 1_000_000

training_args = TrainingArguments(
    output_dir=f"../../data/models/{finetuned_dirname}",
    num_train_epochs=epochs,
    save_steps=save_steps,
    logging_steps=300,
    learning_rate=1e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    evaluation_strategy="epoch",
    logging_strategy="epoch"
)

print(f"Save Steps : {save_steps}")

Save Steps : 2520


In [12]:
import numpy as np
from datasets import load_metric

metric = load_metric("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

  metric = load_metric("accuracy")


In [13]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
)

In [14]:
import time

start = time.time()

trainer.train()

print(f"Execution Time : {round(time.time() - start)} seconds")

  2%|▎         | 63/2520 [00:11<06:14,  6.57it/s] 

{'loss': 0.6777, 'learning_rate': 9.75e-06, 'epoch': 1.0}


                                                 
  3%|▎         | 64/2520 [00:12<13:51,  2.95it/s]

{'eval_loss': 0.6519913077354431, 'eval_accuracy': 0.5789473684210527, 'eval_runtime': 0.9163, 'eval_samples_per_second': 165.887, 'eval_steps_per_second': 20.736, 'epoch': 1.0}


  5%|▌         | 126/2520 [00:21<05:59,  6.66it/s]

{'loss': 0.4839, 'learning_rate': 9.5e-06, 'epoch': 2.0}


                                                  
  5%|▌         | 127/2520 [00:22<13:22,  2.98it/s]

{'eval_loss': 0.41383618116378784, 'eval_accuracy': 0.7894736842105263, 'eval_runtime': 0.9006, 'eval_samples_per_second': 168.781, 'eval_steps_per_second': 21.098, 'epoch': 2.0}


  8%|▊         | 189/2520 [00:31<06:01,  6.45it/s]

{'loss': 0.1754, 'learning_rate': 9.250000000000001e-06, 'epoch': 3.0}


                                                  
  8%|▊         | 190/2520 [00:32<13:11,  2.94it/s]

{'eval_loss': 0.39371606707572937, 'eval_accuracy': 0.8552631578947368, 'eval_runtime': 0.9081, 'eval_samples_per_second': 167.385, 'eval_steps_per_second': 20.923, 'epoch': 3.0}


 10%|█         | 252/2520 [00:42<05:47,  6.52it/s]

{'loss': 0.0517, 'learning_rate': 9e-06, 'epoch': 4.0}


                                                  
 10%|█         | 253/2520 [00:43<12:50,  2.94it/s]

{'eval_loss': 0.4432889521121979, 'eval_accuracy': 0.8618421052631579, 'eval_runtime': 0.9146, 'eval_samples_per_second': 166.187, 'eval_steps_per_second': 20.773, 'epoch': 4.0}


 12%|█▎        | 315/2520 [00:53<06:05,  6.04it/s]

{'loss': 0.0141, 'learning_rate': 8.750000000000001e-06, 'epoch': 5.0}


                                                  
 13%|█▎        | 316/2520 [00:54<13:06,  2.80it/s]

{'eval_loss': 0.5105398297309875, 'eval_accuracy': 0.868421052631579, 'eval_runtime': 0.9266, 'eval_samples_per_second': 164.034, 'eval_steps_per_second': 20.504, 'epoch': 5.0}


 15%|█▌        | 378/2520 [01:04<05:53,  6.06it/s]

{'loss': 0.0145, 'learning_rate': 8.5e-06, 'epoch': 6.0}


                                                  
 15%|█▌        | 379/2520 [01:05<12:47,  2.79it/s]

{'eval_loss': 0.5870457887649536, 'eval_accuracy': 0.875, 'eval_runtime': 0.9326, 'eval_samples_per_second': 162.992, 'eval_steps_per_second': 20.374, 'epoch': 6.0}


 18%|█▊        | 441/2520 [01:15<05:42,  6.07it/s]

{'loss': 0.0065, 'learning_rate': 8.25e-06, 'epoch': 7.0}


                                                  
 18%|█▊        | 442/2520 [01:16<12:19,  2.81it/s]

{'eval_loss': 0.5633563995361328, 'eval_accuracy': 0.8552631578947368, 'eval_runtime': 0.9251, 'eval_samples_per_second': 164.312, 'eval_steps_per_second': 20.539, 'epoch': 7.0}


 20%|██        | 504/2520 [01:27<05:35,  6.01it/s]

{'loss': 0.0011, 'learning_rate': 8.000000000000001e-06, 'epoch': 8.0}


                                                  
 20%|██        | 505/2520 [01:28<12:01,  2.79it/s]

{'eval_loss': 0.6589027643203735, 'eval_accuracy': 0.868421052631579, 'eval_runtime': 0.9253, 'eval_samples_per_second': 164.27, 'eval_steps_per_second': 20.534, 'epoch': 8.0}


 22%|██▎       | 567/2520 [01:38<05:22,  6.06it/s]

{'loss': 0.0008, 'learning_rate': 7.75e-06, 'epoch': 9.0}


                                                  
 23%|██▎       | 568/2520 [01:39<11:38,  2.80it/s]

{'eval_loss': 0.7613883018493652, 'eval_accuracy': 0.8881578947368421, 'eval_runtime': 0.9302, 'eval_samples_per_second': 163.406, 'eval_steps_per_second': 20.426, 'epoch': 9.0}


 25%|██▌       | 630/2520 [01:49<05:19,  5.91it/s]

{'loss': 0.005, 'learning_rate': 7.500000000000001e-06, 'epoch': 10.0}


                                                  
 25%|██▌       | 631/2520 [01:50<11:32,  2.73it/s]

{'eval_loss': 0.6671153903007507, 'eval_accuracy': 0.8947368421052632, 'eval_runtime': 0.9548, 'eval_samples_per_second': 159.194, 'eval_steps_per_second': 19.899, 'epoch': 10.0}


 28%|██▊       | 693/2520 [02:00<05:02,  6.05it/s]

{'loss': 0.0005, 'learning_rate': 7.25e-06, 'epoch': 11.0}


                                                  
 28%|██▊       | 694/2520 [02:01<10:55,  2.79it/s]

{'eval_loss': 0.5686626434326172, 'eval_accuracy': 0.881578947368421, 'eval_runtime': 0.9321, 'eval_samples_per_second': 163.073, 'eval_steps_per_second': 20.384, 'epoch': 11.0}


 30%|███       | 756/2520 [02:11<04:52,  6.03it/s]

{'loss': 0.0004, 'learning_rate': 7e-06, 'epoch': 12.0}


                                                  
 30%|███       | 757/2520 [02:13<10:34,  2.78it/s]

{'eval_loss': 0.6002759337425232, 'eval_accuracy': 0.875, 'eval_runtime': 0.9318, 'eval_samples_per_second': 163.119, 'eval_steps_per_second': 20.39, 'epoch': 12.0}


 32%|███▎      | 819/2520 [02:23<04:40,  6.06it/s]

{'loss': 0.0004, 'learning_rate': 6.750000000000001e-06, 'epoch': 13.0}


                                                  
 33%|███▎      | 820/2520 [02:24<10:20,  2.74it/s]

{'eval_loss': 0.6194301247596741, 'eval_accuracy': 0.875, 'eval_runtime': 0.956, 'eval_samples_per_second': 158.988, 'eval_steps_per_second': 19.873, 'epoch': 13.0}


 35%|███▌      | 882/2520 [02:34<04:16,  6.39it/s]

{'loss': 0.0003, 'learning_rate': 6.5000000000000004e-06, 'epoch': 14.0}


                                                  
 35%|███▌      | 883/2520 [02:36<09:35,  2.84it/s]

{'eval_loss': 0.6374211311340332, 'eval_accuracy': 0.875, 'eval_runtime': 0.9504, 'eval_samples_per_second': 159.941, 'eval_steps_per_second': 19.993, 'epoch': 14.0}


 38%|███▊      | 945/2520 [02:45<04:08,  6.34it/s]

{'loss': 0.0003, 'learning_rate': 6.25e-06, 'epoch': 15.0}


                                                  
 38%|███▊      | 946/2520 [02:46<09:11,  2.85it/s]

{'eval_loss': 0.6533142924308777, 'eval_accuracy': 0.881578947368421, 'eval_runtime': 0.9432, 'eval_samples_per_second': 161.155, 'eval_steps_per_second': 20.144, 'epoch': 15.0}


 40%|████      | 1008/2520 [02:56<03:51,  6.52it/s]

{'loss': 0.0003, 'learning_rate': 6e-06, 'epoch': 16.0}


                                                   
 40%|████      | 1009/2520 [02:57<08:31,  2.96it/s]

{'eval_loss': 0.6628312468528748, 'eval_accuracy': 0.881578947368421, 'eval_runtime': 0.9072, 'eval_samples_per_second': 167.545, 'eval_steps_per_second': 20.943, 'epoch': 16.0}


 42%|████▎     | 1071/2520 [03:06<03:42,  6.51it/s]

{'loss': 0.0002, 'learning_rate': 5.75e-06, 'epoch': 17.0}


                                                   
 43%|████▎     | 1072/2520 [03:08<08:10,  2.95it/s]

{'eval_loss': 0.7100796699523926, 'eval_accuracy': 0.8947368421052632, 'eval_runtime': 0.9117, 'eval_samples_per_second': 166.728, 'eval_steps_per_second': 20.841, 'epoch': 17.0}


 45%|████▌     | 1134/2520 [03:17<03:32,  6.53it/s]

{'loss': 0.0002, 'learning_rate': 5.500000000000001e-06, 'epoch': 18.0}


                                                   
 45%|████▌     | 1135/2520 [03:18<07:48,  2.95it/s]

{'eval_loss': 0.6934766173362732, 'eval_accuracy': 0.875, 'eval_runtime': 0.9078, 'eval_samples_per_second': 167.441, 'eval_steps_per_second': 20.93, 'epoch': 18.0}


 48%|████▊     | 1197/2520 [03:27<03:23,  6.50it/s]

{'loss': 0.0002, 'learning_rate': 5.2500000000000006e-06, 'epoch': 19.0}


                                                   
 48%|████▊     | 1198/2520 [03:28<07:28,  2.95it/s]

{'eval_loss': 0.6864320635795593, 'eval_accuracy': 0.868421052631579, 'eval_runtime': 0.9089, 'eval_samples_per_second': 167.237, 'eval_steps_per_second': 20.905, 'epoch': 19.0}


 50%|█████     | 1260/2520 [03:38<03:13,  6.53it/s]

{'loss': 0.0002, 'learning_rate': 5e-06, 'epoch': 20.0}


                                                   
 50%|█████     | 1261/2520 [03:39<07:05,  2.96it/s]

{'eval_loss': 0.6968185305595398, 'eval_accuracy': 0.868421052631579, 'eval_runtime': 0.9062, 'eval_samples_per_second': 167.734, 'eval_steps_per_second': 20.967, 'epoch': 20.0}


 52%|█████▎    | 1323/2520 [03:48<03:02,  6.54it/s]

{'loss': 0.0002, 'learning_rate': 4.75e-06, 'epoch': 21.0}


                                                   
 53%|█████▎    | 1324/2520 [03:49<06:48,  2.93it/s]

{'eval_loss': 0.7116252183914185, 'eval_accuracy': 0.875, 'eval_runtime': 0.9185, 'eval_samples_per_second': 165.489, 'eval_steps_per_second': 20.686, 'epoch': 21.0}


 55%|█████▌    | 1386/2520 [03:59<02:55,  6.46it/s]

{'loss': 0.0002, 'learning_rate': 4.5e-06, 'epoch': 22.0}


                                                   
 55%|█████▌    | 1387/2520 [04:00<06:25,  2.94it/s]

{'eval_loss': 0.7194314002990723, 'eval_accuracy': 0.875, 'eval_runtime': 0.9072, 'eval_samples_per_second': 167.553, 'eval_steps_per_second': 20.944, 'epoch': 22.0}


 57%|█████▊    | 1449/2520 [04:09<02:42,  6.61it/s]

{'loss': 0.0001, 'learning_rate': 4.25e-06, 'epoch': 23.0}


                                                   
 58%|█████▊    | 1450/2520 [04:10<05:58,  2.99it/s]

{'eval_loss': 0.7240556478500366, 'eval_accuracy': 0.868421052631579, 'eval_runtime': 0.899, 'eval_samples_per_second': 169.071, 'eval_steps_per_second': 21.134, 'epoch': 23.0}


 60%|██████    | 1512/2520 [04:20<02:36,  6.43it/s]

{'loss': 0.0002, 'learning_rate': 4.000000000000001e-06, 'epoch': 24.0}


                                                   
 60%|██████    | 1513/2520 [04:21<05:42,  2.94it/s]

{'eval_loss': 0.6845701336860657, 'eval_accuracy': 0.8881578947368421, 'eval_runtime': 0.9096, 'eval_samples_per_second': 167.104, 'eval_steps_per_second': 20.888, 'epoch': 24.0}


 62%|██████▎   | 1575/2520 [04:30<02:23,  6.58it/s]

{'loss': 0.0001, 'learning_rate': 3.7500000000000005e-06, 'epoch': 25.0}


                                                   
 63%|██████▎   | 1576/2520 [04:31<05:18,  2.96it/s]

{'eval_loss': 0.7105960249900818, 'eval_accuracy': 0.875, 'eval_runtime': 0.9045, 'eval_samples_per_second': 168.051, 'eval_steps_per_second': 21.006, 'epoch': 25.0}


 65%|██████▌   | 1638/2520 [04:41<02:14,  6.56it/s]

{'loss': 0.0001, 'learning_rate': 3.5e-06, 'epoch': 26.0}


                                                   
 65%|██████▌   | 1639/2520 [04:42<04:56,  2.97it/s]

{'eval_loss': 0.7334119081497192, 'eval_accuracy': 0.868421052631579, 'eval_runtime': 0.8992, 'eval_samples_per_second': 169.038, 'eval_steps_per_second': 21.13, 'epoch': 26.0}


 68%|██████▊   | 1701/2520 [04:51<02:04,  6.58it/s]

{'loss': 0.0001, 'learning_rate': 3.2500000000000002e-06, 'epoch': 27.0}


                                                   
 68%|██████▊   | 1702/2520 [04:52<04:36,  2.96it/s]

{'eval_loss': 0.742455244064331, 'eval_accuracy': 0.868421052631579, 'eval_runtime': 0.8996, 'eval_samples_per_second': 168.965, 'eval_steps_per_second': 21.121, 'epoch': 27.0}


 70%|███████   | 1764/2520 [05:02<01:56,  6.47it/s]

{'loss': 0.0001, 'learning_rate': 3e-06, 'epoch': 28.0}


                                                   
 70%|███████   | 1765/2520 [05:03<04:19,  2.91it/s]

{'eval_loss': 0.7444340586662292, 'eval_accuracy': 0.868421052631579, 'eval_runtime': 0.9194, 'eval_samples_per_second': 165.324, 'eval_steps_per_second': 20.666, 'epoch': 28.0}


 72%|███████▎  | 1827/2520 [05:12<01:46,  6.51it/s]

{'loss': 0.0001, 'learning_rate': 2.7500000000000004e-06, 'epoch': 29.0}


                                                   
 73%|███████▎  | 1828/2520 [05:13<03:57,  2.92it/s]

{'eval_loss': 0.7476401329040527, 'eval_accuracy': 0.868421052631579, 'eval_runtime': 0.9222, 'eval_samples_per_second': 164.82, 'eval_steps_per_second': 20.602, 'epoch': 29.0}


 75%|███████▌  | 1890/2520 [05:23<01:37,  6.46it/s]

{'loss': 0.0001, 'learning_rate': 2.5e-06, 'epoch': 30.0}


                                                   
 75%|███████▌  | 1891/2520 [05:24<03:32,  2.96it/s]

{'eval_loss': 0.7804424166679382, 'eval_accuracy': 0.881578947368421, 'eval_runtime': 0.9053, 'eval_samples_per_second': 167.898, 'eval_steps_per_second': 20.987, 'epoch': 30.0}


 78%|███████▊  | 1953/2520 [05:33<01:26,  6.56it/s]

{'loss': 0.0001, 'learning_rate': 2.25e-06, 'epoch': 31.0}


                                                   
 78%|███████▊  | 1954/2520 [05:34<03:10,  2.97it/s]

{'eval_loss': 0.7837942838668823, 'eval_accuracy': 0.881578947368421, 'eval_runtime': 0.9011, 'eval_samples_per_second': 168.687, 'eval_steps_per_second': 21.086, 'epoch': 31.0}


 80%|████████  | 2016/2520 [05:44<01:17,  6.53it/s]

{'loss': 0.0001, 'learning_rate': 2.0000000000000003e-06, 'epoch': 32.0}


                                                   
 80%|████████  | 2017/2520 [05:45<02:51,  2.92it/s]

{'eval_loss': 0.7870989441871643, 'eval_accuracy': 0.881578947368421, 'eval_runtime': 0.9193, 'eval_samples_per_second': 165.344, 'eval_steps_per_second': 20.668, 'epoch': 32.0}


 82%|████████▎ | 2079/2520 [05:54<01:08,  6.48it/s]

{'loss': 0.0001, 'learning_rate': 1.75e-06, 'epoch': 33.0}


                                                   
 83%|████████▎ | 2080/2520 [05:55<02:30,  2.93it/s]

{'eval_loss': 0.7896933555603027, 'eval_accuracy': 0.881578947368421, 'eval_runtime': 0.9153, 'eval_samples_per_second': 166.067, 'eval_steps_per_second': 20.758, 'epoch': 33.0}


 85%|████████▌ | 2142/2520 [06:05<00:57,  6.55it/s]

{'loss': 0.0001, 'learning_rate': 1.5e-06, 'epoch': 34.0}


                                                   
 85%|████████▌ | 2143/2520 [06:06<02:07,  2.95it/s]

{'eval_loss': 0.7932947278022766, 'eval_accuracy': 0.881578947368421, 'eval_runtime': 0.9117, 'eval_samples_per_second': 166.722, 'eval_steps_per_second': 20.84, 'epoch': 34.0}


 88%|████████▊ | 2205/2520 [06:15<00:48,  6.50it/s]

{'loss': 0.0001, 'learning_rate': 1.25e-06, 'epoch': 35.0}


                                                   
 88%|████████▊ | 2206/2520 [06:16<01:47,  2.91it/s]

{'eval_loss': 0.7947858572006226, 'eval_accuracy': 0.881578947368421, 'eval_runtime': 0.927, 'eval_samples_per_second': 163.966, 'eval_steps_per_second': 20.496, 'epoch': 35.0}


 90%|█████████ | 2268/2520 [06:26<00:38,  6.56it/s]

{'loss': 0.0001, 'learning_rate': 1.0000000000000002e-06, 'epoch': 36.0}


                                                   
 90%|█████████ | 2269/2520 [06:27<01:24,  2.97it/s]

{'eval_loss': 0.8143205046653748, 'eval_accuracy': 0.8947368421052632, 'eval_runtime': 0.8998, 'eval_samples_per_second': 168.92, 'eval_steps_per_second': 21.115, 'epoch': 36.0}


 92%|█████████▎| 2331/2520 [06:36<00:29,  6.44it/s]

{'loss': 0.0001, 'learning_rate': 7.5e-07, 'epoch': 37.0}


                                                   
 93%|█████████▎| 2332/2520 [06:37<01:04,  2.92it/s]

{'eval_loss': 0.8150469064712524, 'eval_accuracy': 0.8947368421052632, 'eval_runtime': 0.9186, 'eval_samples_per_second': 165.469, 'eval_steps_per_second': 20.684, 'epoch': 37.0}


 95%|█████████▌| 2394/2520 [06:47<00:19,  6.60it/s]

{'loss': 0.0001, 'learning_rate': 5.000000000000001e-07, 'epoch': 38.0}


                                                   
 95%|█████████▌| 2395/2520 [06:48<00:41,  2.98it/s]

{'eval_loss': 0.8148327469825745, 'eval_accuracy': 0.8947368421052632, 'eval_runtime': 0.9021, 'eval_samples_per_second': 168.489, 'eval_steps_per_second': 21.061, 'epoch': 38.0}


 98%|█████████▊| 2457/2520 [06:57<00:09,  6.57it/s]

{'loss': 0.0001, 'learning_rate': 2.5000000000000004e-07, 'epoch': 39.0}


                                                   
 98%|█████████▊| 2458/2520 [06:58<00:21,  2.95it/s]

{'eval_loss': 0.8152855038642883, 'eval_accuracy': 0.8947368421052632, 'eval_runtime': 0.9089, 'eval_samples_per_second': 167.227, 'eval_steps_per_second': 20.903, 'epoch': 39.0}


100%|██████████| 2520/2520 [07:08<00:00,  6.45it/s]

{'loss': 0.0001, 'learning_rate': 0.0, 'epoch': 40.0}


                                                   
100%|██████████| 2520/2520 [07:09<00:00,  5.86it/s]

{'eval_loss': 0.8153729438781738, 'eval_accuracy': 0.8947368421052632, 'eval_runtime': 0.9104, 'eval_samples_per_second': 166.955, 'eval_steps_per_second': 20.869, 'epoch': 40.0}
{'train_runtime': 429.6911, 'train_samples_per_second': 46.266, 'train_steps_per_second': 5.865, 'train_loss': 0.03589524636229884, 'epoch': 40.0}
Execution Time : 430 seconds





In [15]:
trainer.evaluate()

100%|██████████| 19/19 [00:00<00:00, 22.15it/s]


{'eval_loss': 0.8153729438781738,
 'eval_accuracy': 0.8947368421052632,
 'eval_runtime': 0.9046,
 'eval_samples_per_second': 168.038,
 'eval_steps_per_second': 21.005,
 'epoch': 40.0}