In [1]:
import pandas as pd

finetuned_dirname = "40-epoch-distilbert-base-finetuned-phemernr2-tf"

data = pd.read_csv("../../data/processed/phemernr2-tf_dataset.csv", sep=",")
data = data[['tweet_text', 'tvt2', 'label']]
data['tweet_text'] = data['tweet_text'].str.lower()
print(data.shape)
data.head()

(1705, 3)


Unnamed: 0,tweet_text,tvt2,label
0,breaking - a germanwings airbus a320 plane rep...,training,True
1,reports that two of the dead in the #charliehe...,training,True
2,'no survivors' in #germanwings crash says fren...,training,False
3,tragedy mounts as soldier shot this am dies of...,training,True
4,watch the moment gunfire and explosions were h...,training,True


In [2]:
combined_data = data

In [3]:
import torch

class CustomTextDataset(torch.utils.data.dataset.Dataset):

    def __init__(self, texts, labels):
        self.labels = labels
        self.texts = texts
        self.attention_mask = None
        self.input_ids = None
        self.token_type_ids = None

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        sample = {
            "text": self.texts[idx],
            "label": self.labels[idx],
            "attention_mask": self.attention_mask[idx] if self.attention_mask else None,
            "input_ids": self.input_ids[idx] if self.input_ids else None,
#             "token_type_ids": self.token_type_ids[idx] if self.token_type_ids else None
        }
        return sample
    
    def tokenize(self, tokenizer):
        self.attention_mask = []
        self.input_ids = []
        self.token_type_ids = []

        for text in self.texts:
            token = tokenizer(text, padding="max_length", truncation=True)
            
            self.attention_mask.append(token['attention_mask'])
            self.input_ids.append(token['input_ids'])
#             self.token_type_ids.append(token['token_type_ids'])

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
labels = []

labels_str = combined_data['label'].unique().tolist()
for i, d in combined_data.iterrows():
    lab = labels_str.index(d['label'])
    labels.append(lab)
    
print(len(labels))
labels[:10]

1705


[0, 0, 1, 0, 0, 0, 0, 1, 0, 0]

In [5]:
train_dataset = CustomTextDataset(
    [d['tweet_text'] for i, d in combined_data.iterrows() if d['tvt2'] == 'training'],
    [labels[i] for i, d in combined_data.iterrows() if d['tvt2'] == 'training'])
test_dataset = CustomTextDataset(
    [d['tweet_text'] for i, d in combined_data.iterrows() if d['tvt2'] == 'validation'],
    [labels[i] for i, d in combined_data.iterrows() if d['tvt2'] == 'validation'])
train_dataset[0]

{'text': 'breaking - a germanwings airbus a320 plane reportedly crashed in the region of digne (french alps) #flightradar24 - french tv #itele',
 'label': 0,
 'attention_mask': None,
 'input_ids': None}

In [6]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("distilbert-base-cased")

In [7]:
# inputs = tokenizer(["you're stuck in a timewrap from 2004 though", "summa lumma dumma lumma"], padding="max_length", truncation=True)
# for k,v in inputs.items():
#     print(k)

In [8]:
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

train_dataset.tokenize(tokenizer)
test_dataset.tokenize(tokenizer)

In [9]:
print(len(train_dataset))
print(len(test_dataset))

1176
371


### Fine Tuning

In [10]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-cased",
                                                           output_hidden_states=False,
                                                           num_labels=2)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-cased and are newly initialized: ['classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [11]:
from transformers import TrainingArguments

epochs = 40
batch_size = 8
save_steps = (round((len(train_dataset)/batch_size) + 0.49)) * epochs
# save_steps = 1_000_000

training_args = TrainingArguments(
    output_dir=f"../../data/models/{finetuned_dirname}",
    num_train_epochs=epochs,
    save_steps=save_steps,
    logging_steps=300,
    learning_rate=1e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    evaluation_strategy="epoch",
    logging_strategy="epoch"
)

print(f"Save Steps : {save_steps}")

Save Steps : 5880


In [12]:
import numpy as np
from datasets import load_metric

metric = load_metric("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

  metric = load_metric("accuracy")


In [13]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
)

In [14]:
import time

start = time.time()

trainer.train()

print(f"Execution Time : {round(time.time() - start)} seconds")

  2%|▎         | 147/5880 [00:25<15:19,  6.24it/s]

{'loss': 0.5557, 'learning_rate': 9.75e-06, 'epoch': 1.0}


                                                  
  3%|▎         | 148/5880 [00:27<1:22:23,  1.16it/s]

{'eval_loss': 0.42117857933044434, 'eval_accuracy': 0.8274932614555256, 'eval_runtime': 2.3316, 'eval_samples_per_second': 159.12, 'eval_steps_per_second': 20.158, 'epoch': 1.0}


  5%|▌         | 294/5880 [00:51<15:03,  6.19it/s]  

{'loss': 0.322, 'learning_rate': 9.5e-06, 'epoch': 2.0}


                                                  
  5%|▌         | 295/5880 [00:53<1:20:24,  1.16it/s]

{'eval_loss': 0.33615171909332275, 'eval_accuracy': 0.8652291105121294, 'eval_runtime': 2.3323, 'eval_samples_per_second': 159.068, 'eval_steps_per_second': 20.151, 'epoch': 2.0}


  8%|▊         | 441/5880 [01:17<14:42,  6.16it/s]  

{'loss': 0.219, 'learning_rate': 9.250000000000001e-06, 'epoch': 3.0}


                                                  
  8%|▊         | 442/5880 [01:19<1:18:18,  1.16it/s]

{'eval_loss': 0.4616102874279022, 'eval_accuracy': 0.8679245283018868, 'eval_runtime': 2.3339, 'eval_samples_per_second': 158.959, 'eval_steps_per_second': 20.138, 'epoch': 3.0}


 10%|█         | 588/5880 [01:43<14:14,  6.19it/s]  

{'loss': 0.1641, 'learning_rate': 9e-06, 'epoch': 4.0}


                                                  
 10%|█         | 589/5880 [01:45<1:16:06,  1.16it/s]

{'eval_loss': 0.5803053975105286, 'eval_accuracy': 0.8490566037735849, 'eval_runtime': 2.3313, 'eval_samples_per_second': 159.137, 'eval_steps_per_second': 20.16, 'epoch': 4.0}


 12%|█▎        | 735/5880 [02:09<14:03,  6.10it/s]  

{'loss': 0.0935, 'learning_rate': 8.750000000000001e-06, 'epoch': 5.0}


                                                  
 13%|█▎        | 736/5880 [02:12<1:14:17,  1.15it/s]

{'eval_loss': 0.6238930821418762, 'eval_accuracy': 0.8787061994609164, 'eval_runtime': 2.3402, 'eval_samples_per_second': 158.531, 'eval_steps_per_second': 20.083, 'epoch': 5.0}


 15%|█▌        | 882/5880 [02:35<13:26,  6.19it/s]  

{'loss': 0.0482, 'learning_rate': 8.5e-06, 'epoch': 6.0}


                                                  
 15%|█▌        | 883/5880 [02:38<1:11:50,  1.16it/s]

{'eval_loss': 0.7201200127601624, 'eval_accuracy': 0.876010781671159, 'eval_runtime': 2.3307, 'eval_samples_per_second': 159.18, 'eval_steps_per_second': 20.166, 'epoch': 6.0}


 18%|█▊        | 1029/5880 [03:01<12:53,  6.27it/s] 

{'loss': 0.0324, 'learning_rate': 8.25e-06, 'epoch': 7.0}


                                                   
 18%|█▊        | 1030/5880 [03:04<1:09:07,  1.17it/s]

{'eval_loss': 0.6804285049438477, 'eval_accuracy': 0.8814016172506739, 'eval_runtime': 2.311, 'eval_samples_per_second': 160.535, 'eval_steps_per_second': 20.337, 'epoch': 7.0}


 20%|██        | 1176/5880 [03:27<12:31,  6.26it/s]  

{'loss': 0.0208, 'learning_rate': 8.000000000000001e-06, 'epoch': 8.0}


                                                   
 20%|██        | 1177/5880 [03:30<1:07:03,  1.17it/s]

{'eval_loss': 0.7625241875648499, 'eval_accuracy': 0.8787061994609164, 'eval_runtime': 2.3087, 'eval_samples_per_second': 160.697, 'eval_steps_per_second': 20.358, 'epoch': 8.0}


 22%|██▎       | 1323/5880 [03:53<12:03,  6.29it/s]  

{'loss': 0.016, 'learning_rate': 7.75e-06, 'epoch': 9.0}


                                                   
 23%|██▎       | 1324/5880 [03:55<1:05:10,  1.17it/s]

{'eval_loss': 0.7565612196922302, 'eval_accuracy': 0.8921832884097035, 'eval_runtime': 2.3145, 'eval_samples_per_second': 160.297, 'eval_steps_per_second': 20.307, 'epoch': 9.0}


 25%|██▌       | 1470/5880 [04:19<11:46,  6.24it/s]  

{'loss': 0.0146, 'learning_rate': 7.500000000000001e-06, 'epoch': 10.0}


                                                   
 25%|██▌       | 1471/5880 [04:21<1:03:00,  1.17it/s]

{'eval_loss': 0.9642531275749207, 'eval_accuracy': 0.8598382749326146, 'eval_runtime': 2.3162, 'eval_samples_per_second': 160.175, 'eval_steps_per_second': 20.292, 'epoch': 10.0}


 28%|██▊       | 1617/5880 [04:45<11:25,  6.22it/s]  

{'loss': 0.0084, 'learning_rate': 7.25e-06, 'epoch': 11.0}


                                                   
 28%|██▊       | 1618/5880 [04:47<1:00:56,  1.17it/s]

{'eval_loss': 0.8331313729286194, 'eval_accuracy': 0.876010781671159, 'eval_runtime': 2.3197, 'eval_samples_per_second': 159.936, 'eval_steps_per_second': 20.261, 'epoch': 11.0}


 30%|███       | 1764/5880 [05:11<10:57,  6.26it/s]  

{'loss': 0.0032, 'learning_rate': 7e-06, 'epoch': 12.0}


                                                   
 30%|███       | 1765/5880 [05:13<58:51,  1.17it/s]

{'eval_loss': 0.9226092100143433, 'eval_accuracy': 0.8787061994609164, 'eval_runtime': 2.3219, 'eval_samples_per_second': 159.784, 'eval_steps_per_second': 20.242, 'epoch': 12.0}


 32%|███▎      | 1911/5880 [05:36<10:32,  6.27it/s]

{'loss': 0.0014, 'learning_rate': 6.750000000000001e-06, 'epoch': 13.0}


                                                   
 33%|███▎      | 1912/5880 [05:39<56:30,  1.17it/s]

{'eval_loss': 0.8899511694908142, 'eval_accuracy': 0.8733153638814016, 'eval_runtime': 2.3091, 'eval_samples_per_second': 160.671, 'eval_steps_per_second': 20.355, 'epoch': 13.0}


 35%|███▌      | 2058/5880 [06:02<10:10,  6.26it/s]

{'loss': 0.0093, 'learning_rate': 6.5000000000000004e-06, 'epoch': 14.0}


                                                   
 35%|███▌      | 2059/5880 [06:05<54:26,  1.17it/s]

{'eval_loss': 0.8062238097190857, 'eval_accuracy': 0.8921832884097035, 'eval_runtime': 2.3101, 'eval_samples_per_second': 160.6, 'eval_steps_per_second': 20.345, 'epoch': 14.0}


 38%|███▊      | 2205/5880 [06:28<09:50,  6.22it/s]

{'loss': 0.0001, 'learning_rate': 6.25e-06, 'epoch': 15.0}


                                                   
 38%|███▊      | 2206/5880 [06:30<52:23,  1.17it/s]

{'eval_loss': 0.8341721892356873, 'eval_accuracy': 0.8814016172506739, 'eval_runtime': 2.311, 'eval_samples_per_second': 160.539, 'eval_steps_per_second': 20.338, 'epoch': 15.0}


 40%|████      | 2352/5880 [06:54<09:21,  6.28it/s]

{'loss': 0.0013, 'learning_rate': 6e-06, 'epoch': 16.0}


                                                   
 40%|████      | 2353/5880 [06:56<50:16,  1.17it/s]

{'eval_loss': 0.9864006638526917, 'eval_accuracy': 0.8706199460916442, 'eval_runtime': 2.3122, 'eval_samples_per_second': 160.453, 'eval_steps_per_second': 20.327, 'epoch': 16.0}


 42%|████▎     | 2499/5880 [07:20<08:59,  6.27it/s]

{'loss': 0.0182, 'learning_rate': 5.75e-06, 'epoch': 17.0}


                                                   
 43%|████▎     | 2500/5880 [07:22<48:06,  1.17it/s]

{'eval_loss': 0.9624625444412231, 'eval_accuracy': 0.8787061994609164, 'eval_runtime': 2.308, 'eval_samples_per_second': 160.747, 'eval_steps_per_second': 20.364, 'epoch': 17.0}


 45%|████▌     | 2646/5880 [07:45<08:34,  6.29it/s]

{'loss': 0.0123, 'learning_rate': 5.500000000000001e-06, 'epoch': 18.0}


                                                   
 45%|████▌     | 2647/5880 [07:48<46:05,  1.17it/s]

{'eval_loss': 1.1528797149658203, 'eval_accuracy': 0.8571428571428571, 'eval_runtime': 2.3127, 'eval_samples_per_second': 160.418, 'eval_steps_per_second': 20.323, 'epoch': 18.0}


 48%|████▊     | 2793/5880 [08:11<08:13,  6.25it/s]

{'loss': 0.0201, 'learning_rate': 5.2500000000000006e-06, 'epoch': 19.0}


                                                   
 48%|████▊     | 2794/5880 [08:14<44:00,  1.17it/s]

{'eval_loss': 1.1016918420791626, 'eval_accuracy': 0.8733153638814016, 'eval_runtime': 2.3117, 'eval_samples_per_second': 160.49, 'eval_steps_per_second': 20.332, 'epoch': 19.0}


 50%|█████     | 2940/5880 [08:37<07:47,  6.28it/s]

{'loss': 0.0006, 'learning_rate': 5e-06, 'epoch': 20.0}


                                                   
 50%|█████     | 2941/5880 [08:39<41:49,  1.17it/s]

{'eval_loss': 1.0796613693237305, 'eval_accuracy': 0.8679245283018868, 'eval_runtime': 2.3088, 'eval_samples_per_second': 160.69, 'eval_steps_per_second': 20.357, 'epoch': 20.0}


 52%|█████▎    | 3087/5880 [09:03<07:26,  6.26it/s]

{'loss': 0.0016, 'learning_rate': 4.75e-06, 'epoch': 21.0}


                                                   
 53%|█████▎    | 3088/5880 [09:05<39:47,  1.17it/s]

{'eval_loss': 1.0354347229003906, 'eval_accuracy': 0.8814016172506739, 'eval_runtime': 2.3094, 'eval_samples_per_second': 160.645, 'eval_steps_per_second': 20.351, 'epoch': 21.0}


 55%|█████▌    | 3234/5880 [09:29<07:06,  6.20it/s]

{'loss': 0.0001, 'learning_rate': 4.5e-06, 'epoch': 22.0}


                                                   
 55%|█████▌    | 3235/5880 [09:31<37:47,  1.17it/s]

{'eval_loss': 1.0759793519973755, 'eval_accuracy': 0.876010781671159, 'eval_runtime': 2.3169, 'eval_samples_per_second': 160.131, 'eval_steps_per_second': 20.286, 'epoch': 22.0}


 57%|█████▊    | 3381/5880 [09:54<06:41,  6.22it/s]

{'loss': 0.0118, 'learning_rate': 4.25e-06, 'epoch': 23.0}


                                                   
 58%|█████▊    | 3382/5880 [09:57<35:39,  1.17it/s]

{'eval_loss': 0.9240576028823853, 'eval_accuracy': 0.894878706199461, 'eval_runtime': 2.3145, 'eval_samples_per_second': 160.294, 'eval_steps_per_second': 20.307, 'epoch': 23.0}


 60%|██████    | 3528/5880 [10:20<06:17,  6.24it/s]

{'loss': 0.0075, 'learning_rate': 4.000000000000001e-06, 'epoch': 24.0}


                                                   
 60%|██████    | 3529/5880 [10:23<33:29,  1.17it/s]

{'eval_loss': 1.0259507894515991, 'eval_accuracy': 0.8840970350404312, 'eval_runtime': 2.3105, 'eval_samples_per_second': 160.574, 'eval_steps_per_second': 20.342, 'epoch': 24.0}


 62%|██████▎   | 3675/5880 [10:46<05:52,  6.26it/s]

{'loss': 0.0, 'learning_rate': 3.7500000000000005e-06, 'epoch': 25.0}


                                                   
 63%|██████▎   | 3676/5880 [10:49<31:26,  1.17it/s]

{'eval_loss': 0.9125037789344788, 'eval_accuracy': 0.8840970350404312, 'eval_runtime': 2.313, 'eval_samples_per_second': 160.399, 'eval_steps_per_second': 20.32, 'epoch': 25.0}


 65%|██████▌   | 3822/5880 [11:12<05:28,  6.26it/s]

{'loss': 0.0009, 'learning_rate': 3.5e-06, 'epoch': 26.0}


                                                   
 65%|██████▌   | 3823/5880 [11:15<29:20,  1.17it/s]

{'eval_loss': 1.0085195302963257, 'eval_accuracy': 0.8867924528301887, 'eval_runtime': 2.3142, 'eval_samples_per_second': 160.313, 'eval_steps_per_second': 20.309, 'epoch': 26.0}


 68%|██████▊   | 3969/5880 [11:38<05:04,  6.28it/s]

{'loss': 0.0, 'learning_rate': 3.2500000000000002e-06, 'epoch': 27.0}


                                                   
 68%|██████▊   | 3970/5880 [11:40<27:14,  1.17it/s]

{'eval_loss': 0.8515820503234863, 'eval_accuracy': 0.8975741239892183, 'eval_runtime': 2.3145, 'eval_samples_per_second': 160.293, 'eval_steps_per_second': 20.307, 'epoch': 27.0}


 70%|███████   | 4116/5880 [12:04<04:41,  6.26it/s]

{'loss': 0.0058, 'learning_rate': 3e-06, 'epoch': 28.0}


                                                   
 70%|███████   | 4117/5880 [12:06<25:14,  1.16it/s]

{'eval_loss': 1.0626418590545654, 'eval_accuracy': 0.8867924528301887, 'eval_runtime': 2.3228, 'eval_samples_per_second': 159.722, 'eval_steps_per_second': 20.234, 'epoch': 28.0}


 72%|███████▎  | 4263/5880 [12:30<04:21,  6.17it/s]

{'loss': 0.0, 'learning_rate': 2.7500000000000004e-06, 'epoch': 29.0}


                                                   
 73%|███████▎  | 4264/5880 [12:32<23:19,  1.15it/s]

{'eval_loss': 1.0375341176986694, 'eval_accuracy': 0.8787061994609164, 'eval_runtime': 2.3395, 'eval_samples_per_second': 158.578, 'eval_steps_per_second': 20.089, 'epoch': 29.0}


 75%|███████▌  | 4410/5880 [12:56<03:57,  6.18it/s]

{'loss': 0.0, 'learning_rate': 2.5e-06, 'epoch': 30.0}


                                                   
 75%|███████▌  | 4411/5880 [12:58<21:06,  1.16it/s]

{'eval_loss': 1.1193710565567017, 'eval_accuracy': 0.8840970350404312, 'eval_runtime': 2.3291, 'eval_samples_per_second': 159.289, 'eval_steps_per_second': 20.18, 'epoch': 30.0}


 78%|███████▊  | 4557/5880 [13:22<03:33,  6.19it/s]

{'loss': 0.0, 'learning_rate': 2.25e-06, 'epoch': 31.0}


                                                   
 78%|███████▊  | 4558/5880 [13:25<19:05,  1.15it/s]

{'eval_loss': 1.149750828742981, 'eval_accuracy': 0.876010781671159, 'eval_runtime': 2.3428, 'eval_samples_per_second': 158.355, 'eval_steps_per_second': 20.061, 'epoch': 31.0}


 80%|████████  | 4704/5880 [13:48<03:08,  6.22it/s]

{'loss': 0.0, 'learning_rate': 2.0000000000000003e-06, 'epoch': 32.0}


                                                   
 80%|████████  | 4705/5880 [13:50<16:47,  1.17it/s]

{'eval_loss': 1.1051710844039917, 'eval_accuracy': 0.8814016172506739, 'eval_runtime': 2.3176, 'eval_samples_per_second': 160.08, 'eval_steps_per_second': 20.28, 'epoch': 32.0}


 82%|████████▎ | 4851/5880 [14:14<02:44,  6.25it/s]

{'loss': 0.0, 'learning_rate': 1.75e-06, 'epoch': 33.0}


                                                   
 83%|████████▎ | 4852/5880 [14:16<14:40,  1.17it/s]

{'eval_loss': 1.0290101766586304, 'eval_accuracy': 0.8921832884097035, 'eval_runtime': 2.3108, 'eval_samples_per_second': 160.553, 'eval_steps_per_second': 20.34, 'epoch': 33.0}


 85%|████████▌ | 4998/5880 [14:40<02:21,  6.24it/s]

{'loss': 0.002, 'learning_rate': 1.5e-06, 'epoch': 34.0}


                                                   
 85%|████████▌ | 4999/5880 [14:42<12:38,  1.16it/s]

{'eval_loss': 0.9981809258460999, 'eval_accuracy': 0.8921832884097035, 'eval_runtime': 2.3296, 'eval_samples_per_second': 159.254, 'eval_steps_per_second': 20.175, 'epoch': 34.0}


 88%|████████▊ | 5145/5880 [15:06<01:57,  6.27it/s]

{'loss': 0.0, 'learning_rate': 1.25e-06, 'epoch': 35.0}


                                                   
 88%|████████▊ | 5146/5880 [15:08<10:30,  1.16it/s]

{'eval_loss': 1.0165244340896606, 'eval_accuracy': 0.894878706199461, 'eval_runtime': 2.319, 'eval_samples_per_second': 159.986, 'eval_steps_per_second': 20.268, 'epoch': 35.0}


 90%|█████████ | 5292/5880 [15:31<01:34,  6.22it/s]

{'loss': 0.0, 'learning_rate': 1.0000000000000002e-06, 'epoch': 36.0}


                                                   
 90%|█████████ | 5293/5880 [15:34<08:24,  1.16it/s]

{'eval_loss': 0.9969713687896729, 'eval_accuracy': 0.8921832884097035, 'eval_runtime': 2.3218, 'eval_samples_per_second': 159.787, 'eval_steps_per_second': 20.243, 'epoch': 36.0}


 92%|█████████▎| 5439/5880 [15:57<01:11,  6.20it/s]

{'loss': 0.0, 'learning_rate': 7.5e-07, 'epoch': 37.0}


                                                   
 93%|█████████▎| 5440/5880 [16:00<06:16,  1.17it/s]

{'eval_loss': 0.9325935244560242, 'eval_accuracy': 0.8921832884097035, 'eval_runtime': 2.3121, 'eval_samples_per_second': 160.462, 'eval_steps_per_second': 20.328, 'epoch': 37.0}


 95%|█████████▌| 5586/5880 [16:23<00:47,  6.24it/s]

{'loss': 0.0, 'learning_rate': 5.000000000000001e-07, 'epoch': 38.0}


                                                   
 95%|█████████▌| 5587/5880 [16:26<04:10,  1.17it/s]

{'eval_loss': 0.9371054768562317, 'eval_accuracy': 0.9002695417789758, 'eval_runtime': 2.3154, 'eval_samples_per_second': 160.229, 'eval_steps_per_second': 20.299, 'epoch': 38.0}


 98%|█████████▊| 5733/5880 [16:49<00:23,  6.22it/s]

{'loss': 0.0, 'learning_rate': 2.5000000000000004e-07, 'epoch': 39.0}


                                                   
 98%|█████████▊| 5734/5880 [16:51<02:04,  1.17it/s]

{'eval_loss': 0.9428479671478271, 'eval_accuracy': 0.9002695417789758, 'eval_runtime': 2.3021, 'eval_samples_per_second': 161.157, 'eval_steps_per_second': 20.416, 'epoch': 39.0}


100%|██████████| 5880/5880 [17:15<00:00,  6.26it/s]

{'loss': 0.0, 'learning_rate': 0.0, 'epoch': 40.0}


                                                   
100%|██████████| 5880/5880 [17:18<00:00,  5.66it/s]

{'eval_loss': 0.94346022605896, 'eval_accuracy': 0.9002695417789758, 'eval_runtime': 2.2977, 'eval_samples_per_second': 161.467, 'eval_steps_per_second': 20.455, 'epoch': 40.0}
{'train_runtime': 1038.244, 'train_samples_per_second': 45.307, 'train_steps_per_second': 5.663, 'train_loss': 0.039785250794533704, 'epoch': 40.0}
Execution Time : 1038 seconds





In [15]:
trainer.evaluate()

100%|██████████| 47/47 [00:02<00:00, 20.98it/s]


{'eval_loss': 0.94346022605896,
 'eval_accuracy': 0.9002695417789758,
 'eval_runtime': 2.2891,
 'eval_samples_per_second': 162.073,
 'eval_steps_per_second': 20.532,
 'epoch': 40.0}