In [1]:
import pandas as pd

finetuned_dirname = "40-epoch-distilbert-base-finetuned-phemernr2-rnr"

data = pd.read_csv("../../data/phemernr2_dataset_with_tvt.csv", sep=",")
# data = data[['tweet_text', 'tvt2', 'label2']]
# data.columns = ['tweet_text', 'tvt2', 'label']
print(data.shape)
data.head()

(6425, 5)


Unnamed: 0,tweet_id,tweet_text,label,tvt2,tvt2_1
0,552833795142209536,the east london mosque would like to offer its...,non-rumours,training,training
1,580318210609696769,breaking - a germanwings airbus a320 plane rep...,true,validation,testting
2,552798891994009601,reports that two of the dead in the #charliehe...,true,training,training
3,576790814942236672,after #putin disappeared russian tv no longer ...,non-rumours,validation,training
4,499678822598340608,saw #ferguson for myself. #justiceformichaelbr...,non-rumours,testting,testting


In [2]:
data['label'] = data['label'].replace(['true', 'unverfied', 'false'], 'rumors')

In [3]:
combined_data = data

In [4]:
import torch

class CustomTextDataset(torch.utils.data.dataset.Dataset):

    def __init__(self, texts, labels):
        self.labels = labels
        self.texts = texts
        self.attention_mask = None
        self.input_ids = None
        self.token_type_ids = None

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        sample = {
            "text": self.texts[idx],
            "label": self.labels[idx],
            "attention_mask": self.attention_mask[idx] if self.attention_mask else None,
            "input_ids": self.input_ids[idx] if self.input_ids else None,
        }
        return sample
    
    def tokenize(self, tokenizer):
        self.attention_mask = []
        self.input_ids = []
        self.token_type_ids = []

        for text in self.texts:
            token = tokenizer(text, padding="max_length", truncation=True)
            
            self.attention_mask.append(token['attention_mask'])
            self.input_ids.append(token['input_ids'])

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
labels = []

labels_str = combined_data['label'].unique().tolist()
for i, d in combined_data.iterrows():
    if d['label'] == "non-rumours":
        lab = 1
    else:
        lab = 0
    labels.append(lab)
    
print(len(labels))
labels[:10]

6425


[1, 0, 0, 1, 1, 1, 1, 0, 1, 1]

In [6]:
train_dataset = CustomTextDataset(
    [d['tweet_text'] for i, d in combined_data.iterrows() if d['tvt2'] == 'training'],
    [labels[i] for i, d in combined_data.iterrows() if d['tvt2'] == 'training'])
test_dataset = CustomTextDataset(
    [d['tweet_text'] for i, d in combined_data.iterrows() if d['tvt2'] == 'validation'],
    [labels[i] for i, d in combined_data.iterrows() if d['tvt2'] == 'validation'])
train_dataset[0]

{'text': 'the east london mosque would like to offer its sincere condolences to the families of those killed during the #charliehebdo attacks (1/2)',
 'label': 1,
 'attention_mask': None,
 'input_ids': None}

In [7]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("distilbert-base-cased")

In [8]:
# inputs = tokenizer(["you're stuck in a timewrap from 2004 though", "summa lumma dumma lumma"], padding="max_length", truncation=True)
# for k,v in inputs.items():
#     print(k)

In [9]:
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

train_dataset.tokenize(tokenizer)
test_dataset.tokenize(tokenizer)

In [10]:
print(len(train_dataset))
print(len(test_dataset))

4336
1462


### Fine Tuning

In [11]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-cased",
                                                           output_hidden_states=False,
                                                           num_labels=2)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-cased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [12]:
from transformers import TrainingArguments

epochs = 40
batch_size = 8
save_steps = (round((len(train_dataset)/batch_size) + 0.49)) * epochs
# save_steps = 1_000_000

training_args = TrainingArguments(
    output_dir=f"../../data/models/{finetuned_dirname}",
    num_train_epochs=epochs,
    save_steps=save_steps,
    logging_steps=300,
    learning_rate=1e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    evaluation_strategy="epoch",
    logging_strategy="epoch"
)

print(f"Save Steps : {save_steps}")

Save Steps : 21680


In [13]:
import numpy as np
from datasets import load_metric

metric = load_metric("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

  metric = load_metric("accuracy")


In [14]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
)

In [15]:
import time

start = time.time()

trainer.train()

print(f"Execution Time : {round(time.time() - start)} seconds")

  2%|▎         | 542/21680 [01:26<53:44,  6.56it/s]  

{'loss': 0.4694, 'learning_rate': 9.75e-06, 'epoch': 1.0}


                                                   
  3%|▎         | 543/21680 [01:35<16:18:22,  2.78s/it]

{'eval_loss': 0.377143532037735, 'eval_accuracy': 0.847469220246238, 'eval_runtime': 8.7312, 'eval_samples_per_second': 167.446, 'eval_steps_per_second': 20.959, 'epoch': 1.0}


  5%|▌         | 1084/21680 [02:58<52:25,  6.55it/s]  

{'loss': 0.3419, 'learning_rate': 9.5e-06, 'epoch': 2.0}


                                                    
  5%|▌         | 1085/21680 [03:07<15:45:09,  2.75s/it]

{'eval_loss': 0.3502204716205597, 'eval_accuracy': 0.8673050615595075, 'eval_runtime': 8.6581, 'eval_samples_per_second': 168.858, 'eval_steps_per_second': 21.136, 'epoch': 2.0}


  8%|▊         | 1626/21680 [04:29<50:51,  6.57it/s]   

{'loss': 0.2371, 'learning_rate': 9.250000000000001e-06, 'epoch': 3.0}


                                                    
  8%|▊         | 1627/21680 [04:38<15:20:41,  2.75s/it]

{'eval_loss': 0.44297099113464355, 'eval_accuracy': 0.8707250341997264, 'eval_runtime': 8.6666, 'eval_samples_per_second': 168.694, 'eval_steps_per_second': 21.116, 'epoch': 3.0}


 10%|█         | 2168/21680 [06:01<49:47,  6.53it/s]   

{'loss': 0.1589, 'learning_rate': 9e-06, 'epoch': 4.0}


                                                    
 10%|█         | 2169/21680 [06:10<14:55:13,  2.75s/it]

{'eval_loss': 0.6247350573539734, 'eval_accuracy': 0.8632010943912448, 'eval_runtime': 8.6586, 'eval_samples_per_second': 168.849, 'eval_steps_per_second': 21.135, 'epoch': 4.0}


 12%|█▎        | 2710/21680 [07:33<48:29,  6.52it/s]   

{'loss': 0.0905, 'learning_rate': 8.750000000000001e-06, 'epoch': 5.0}


                                                    
 13%|█▎        | 2711/21680 [07:42<14:33:03,  2.76s/it]

{'eval_loss': 0.6862855553627014, 'eval_accuracy': 0.8830369357045144, 'eval_runtime': 8.6863, 'eval_samples_per_second': 168.311, 'eval_steps_per_second': 21.068, 'epoch': 5.0}


 15%|█▌        | 3252/21680 [09:05<47:09,  6.51it/s]   

{'loss': 0.0642, 'learning_rate': 8.5e-06, 'epoch': 6.0}


                                                    
 15%|█▌        | 3253/21680 [09:14<14:07:41,  2.76s/it]

{'eval_loss': 0.8396640419960022, 'eval_accuracy': 0.8618331053351573, 'eval_runtime': 8.6812, 'eval_samples_per_second': 168.41, 'eval_steps_per_second': 21.08, 'epoch': 6.0}


 18%|█▊        | 3794/21680 [10:37<45:43,  6.52it/s]   

{'loss': 0.0464, 'learning_rate': 8.25e-06, 'epoch': 7.0}


                                                    
 18%|█▊        | 3795/21680 [10:46<13:43:22,  2.76s/it]

{'eval_loss': 0.8796049356460571, 'eval_accuracy': 0.8618331053351573, 'eval_runtime': 8.6895, 'eval_samples_per_second': 168.248, 'eval_steps_per_second': 21.06, 'epoch': 7.0}


 20%|██        | 4336/21680 [12:09<44:15,  6.53it/s]   

{'loss': 0.0314, 'learning_rate': 8.000000000000001e-06, 'epoch': 8.0}


                                                    
 20%|██        | 4337/21680 [12:18<13:16:42,  2.76s/it]

{'eval_loss': 0.8876963257789612, 'eval_accuracy': 0.8666210670314638, 'eval_runtime': 8.67, 'eval_samples_per_second': 168.627, 'eval_steps_per_second': 21.107, 'epoch': 8.0}


 22%|██▎       | 4878/21680 [13:41<43:09,  6.49it/s]   

{'loss': 0.0314, 'learning_rate': 7.75e-06, 'epoch': 9.0}


                                                    
 23%|██▎       | 4879/21680 [13:50<12:53:09,  2.76s/it]

{'eval_loss': 0.9566124081611633, 'eval_accuracy': 0.8693570451436389, 'eval_runtime': 8.6859, 'eval_samples_per_second': 168.319, 'eval_steps_per_second': 21.069, 'epoch': 9.0}


 25%|██▌       | 5420/21680 [15:13<41:49,  6.48it/s]   

{'loss': 0.019, 'learning_rate': 7.500000000000001e-06, 'epoch': 10.0}


                                                    
 25%|██▌       | 5421/21680 [15:22<12:28:18,  2.76s/it]

{'eval_loss': 0.9593166708946228, 'eval_accuracy': 0.8803009575923393, 'eval_runtime': 8.6772, 'eval_samples_per_second': 168.488, 'eval_steps_per_second': 21.09, 'epoch': 10.0}


 28%|██▊       | 5962/21680 [16:45<40:35,  6.45it/s]   

{'loss': 0.025, 'learning_rate': 7.25e-06, 'epoch': 11.0}


                                                    
 28%|██▊       | 5963/21680 [16:54<12:03:31,  2.76s/it]

{'eval_loss': 0.9933716058731079, 'eval_accuracy': 0.8830369357045144, 'eval_runtime': 8.6902, 'eval_samples_per_second': 168.236, 'eval_steps_per_second': 21.058, 'epoch': 11.0}


 30%|███       | 6504/21680 [18:17<38:38,  6.54it/s]   

{'loss': 0.0079, 'learning_rate': 7e-06, 'epoch': 12.0}


                                                    
 30%|███       | 6505/21680 [18:26<11:38:44,  2.76s/it]

{'eval_loss': 1.1853338479995728, 'eval_accuracy': 0.8638850889192886, 'eval_runtime': 8.688, 'eval_samples_per_second': 168.279, 'eval_steps_per_second': 21.064, 'epoch': 12.0}


 32%|███▎      | 7046/21680 [19:49<37:16,  6.54it/s]   

{'loss': 0.0145, 'learning_rate': 6.750000000000001e-06, 'epoch': 13.0}


                                                    
 33%|███▎      | 7047/21680 [19:58<11:12:45,  2.76s/it]

{'eval_loss': 1.1797480583190918, 'eval_accuracy': 0.872093023255814, 'eval_runtime': 8.6768, 'eval_samples_per_second': 168.495, 'eval_steps_per_second': 21.091, 'epoch': 13.0}


 35%|███▌      | 7588/21680 [21:21<35:56,  6.53it/s]   

{'loss': 0.0174, 'learning_rate': 6.5000000000000004e-06, 'epoch': 14.0}


                                                    
 35%|███▌      | 7589/21680 [21:30<10:48:09,  2.76s/it]

{'eval_loss': 1.1494791507720947, 'eval_accuracy': 0.8707250341997264, 'eval_runtime': 8.6825, 'eval_samples_per_second': 168.385, 'eval_steps_per_second': 21.077, 'epoch': 14.0}


 38%|███▊      | 8130/21680 [22:53<34:39,  6.52it/s]   

{'loss': 0.0163, 'learning_rate': 6.25e-06, 'epoch': 15.0}


                                                    
 38%|███▊      | 8131/21680 [23:02<10:23:31,  2.76s/it]

{'eval_loss': 1.2482339143753052, 'eval_accuracy': 0.8625170998632011, 'eval_runtime': 8.6871, 'eval_samples_per_second': 168.295, 'eval_steps_per_second': 21.066, 'epoch': 15.0}


 40%|████      | 8672/21680 [24:25<33:47,  6.42it/s]   

{'loss': 0.0188, 'learning_rate': 6e-06, 'epoch': 16.0}


                                                    
 40%|████      | 8673/21680 [24:34<9:59:18,  2.76s/it]

{'eval_loss': 1.1747688055038452, 'eval_accuracy': 0.8577291381668947, 'eval_runtime': 8.6923, 'eval_samples_per_second': 168.195, 'eval_steps_per_second': 21.053, 'epoch': 16.0}


 42%|████▎     | 9214/21680 [25:57<31:44,  6.55it/s]  

{'loss': 0.0078, 'learning_rate': 5.75e-06, 'epoch': 17.0}


                                                    
 43%|████▎     | 9215/21680 [26:06<9:33:21,  2.76s/it]

{'eval_loss': 1.19091796875, 'eval_accuracy': 0.8707250341997264, 'eval_runtime': 8.6819, 'eval_samples_per_second': 168.396, 'eval_steps_per_second': 21.078, 'epoch': 17.0}


 45%|████▌     | 9756/21680 [27:29<30:19,  6.55it/s]  

{'loss': 0.0112, 'learning_rate': 5.500000000000001e-06, 'epoch': 18.0}


                                                    
 45%|████▌     | 9757/21680 [27:38<9:09:09,  2.76s/it]

{'eval_loss': 1.3862990140914917, 'eval_accuracy': 0.8590971272229823, 'eval_runtime': 8.6947, 'eval_samples_per_second': 168.148, 'eval_steps_per_second': 21.047, 'epoch': 18.0}


 48%|████▊     | 10298/21680 [29:01<29:11,  6.50it/s] 

{'loss': 0.0116, 'learning_rate': 5.2500000000000006e-06, 'epoch': 19.0}


                                                     
 48%|████▊     | 10299/21680 [29:10<8:43:47,  2.76s/it]

{'eval_loss': 1.1697202920913696, 'eval_accuracy': 0.884404924760602, 'eval_runtime': 8.6888, 'eval_samples_per_second': 168.263, 'eval_steps_per_second': 21.062, 'epoch': 19.0}


 50%|█████     | 10840/21680 [30:33<27:51,  6.48it/s]  

{'loss': 0.0131, 'learning_rate': 5e-06, 'epoch': 20.0}


                                                     
 50%|█████     | 10841/21680 [30:42<8:19:27,  2.76s/it]

{'eval_loss': 1.1792734861373901, 'eval_accuracy': 0.8803009575923393, 'eval_runtime': 8.692, 'eval_samples_per_second': 168.2, 'eval_steps_per_second': 21.054, 'epoch': 20.0}


 52%|█████▎    | 11382/21680 [32:05<26:17,  6.53it/s]  

{'loss': 0.0144, 'learning_rate': 4.75e-06, 'epoch': 21.0}


                                                     
 53%|█████▎    | 11383/21680 [32:14<7:53:50,  2.76s/it]

{'eval_loss': 1.1735236644744873, 'eval_accuracy': 0.8727770177838577, 'eval_runtime': 8.687, 'eval_samples_per_second': 168.298, 'eval_steps_per_second': 21.066, 'epoch': 21.0}


 55%|█████▌    | 11924/21680 [33:37<25:01,  6.50it/s]  

{'loss': 0.0086, 'learning_rate': 4.5e-06, 'epoch': 22.0}


                                                     
 55%|█████▌    | 11925/21680 [33:46<7:28:43,  2.76s/it]

{'eval_loss': 1.1740590333938599, 'eval_accuracy': 0.86593707250342, 'eval_runtime': 8.683, 'eval_samples_per_second': 168.375, 'eval_steps_per_second': 21.076, 'epoch': 22.0}


 57%|█████▊    | 12466/21680 [35:09<23:29,  6.54it/s]  

{'loss': 0.0029, 'learning_rate': 4.25e-06, 'epoch': 23.0}


                                                     
 58%|█████▊    | 12467/21680 [35:18<7:04:38,  2.77s/it]

{'eval_loss': 1.1482586860656738, 'eval_accuracy': 0.8803009575923393, 'eval_runtime': 8.6938, 'eval_samples_per_second': 168.166, 'eval_steps_per_second': 21.049, 'epoch': 23.0}


 60%|██████    | 13008/21680 [36:41<22:22,  6.46it/s]  

{'loss': 0.0058, 'learning_rate': 4.000000000000001e-06, 'epoch': 24.0}


                                                     
 60%|██████    | 13009/21680 [36:50<6:39:13,  2.76s/it]

{'eval_loss': 1.20418119430542, 'eval_accuracy': 0.8761969904240766, 'eval_runtime': 8.6896, 'eval_samples_per_second': 168.248, 'eval_steps_per_second': 21.06, 'epoch': 24.0}


 62%|██████▎   | 13550/21680 [38:14<21:00,  6.45it/s]  

{'loss': 0.0032, 'learning_rate': 3.7500000000000005e-06, 'epoch': 25.0}


                                                     
 63%|██████▎   | 13551/21680 [38:22<6:14:27,  2.76s/it]

{'eval_loss': 1.2347562313079834, 'eval_accuracy': 0.8741450068399452, 'eval_runtime': 8.6947, 'eval_samples_per_second': 168.148, 'eval_steps_per_second': 21.047, 'epoch': 25.0}


 65%|██████▌   | 14092/21680 [39:46<19:42,  6.42it/s]  

{'loss': 0.0007, 'learning_rate': 3.5e-06, 'epoch': 26.0}


                                                     
 65%|██████▌   | 14093/21680 [39:55<5:49:52,  2.77s/it]

{'eval_loss': 1.327087640762329, 'eval_accuracy': 0.8673050615595075, 'eval_runtime': 8.6969, 'eval_samples_per_second': 168.105, 'eval_steps_per_second': 21.042, 'epoch': 26.0}


 68%|██████▊   | 14634/21680 [41:18<18:01,  6.51it/s]  

{'loss': 0.0047, 'learning_rate': 3.2500000000000002e-06, 'epoch': 27.0}


                                                     
 68%|██████▊   | 14635/21680 [41:27<5:24:16,  2.76s/it]

{'eval_loss': 1.2702598571777344, 'eval_accuracy': 0.8803009575923393, 'eval_runtime': 8.6863, 'eval_samples_per_second': 168.31, 'eval_steps_per_second': 21.068, 'epoch': 27.0}


 70%|███████   | 15176/21680 [42:50<16:39,  6.51it/s]  

{'loss': 0.0038, 'learning_rate': 3e-06, 'epoch': 28.0}


                                                     
 70%|███████   | 15177/21680 [42:59<4:59:04,  2.76s/it]

{'eval_loss': 1.4347540140151978, 'eval_accuracy': 0.8632010943912448, 'eval_runtime': 8.6807, 'eval_samples_per_second': 168.42, 'eval_steps_per_second': 21.081, 'epoch': 28.0}


 72%|███████▎  | 15718/21680 [44:22<15:26,  6.43it/s]  

{'loss': 0.0019, 'learning_rate': 2.7500000000000004e-06, 'epoch': 29.0}


                                                     
 73%|███████▎  | 15719/21680 [44:31<4:34:57,  2.77s/it]

{'eval_loss': 1.2856435775756836, 'eval_accuracy': 0.8789329685362517, 'eval_runtime': 8.7004, 'eval_samples_per_second': 168.038, 'eval_steps_per_second': 21.033, 'epoch': 29.0}


 75%|███████▌  | 16260/21680 [45:54<13:49,  6.53it/s]  

{'loss': 0.0005, 'learning_rate': 2.5e-06, 'epoch': 30.0}


                                                     
 75%|███████▌  | 16261/21680 [46:03<4:08:57,  2.76s/it]

{'eval_loss': 1.2939743995666504, 'eval_accuracy': 0.8809849521203831, 'eval_runtime': 8.6708, 'eval_samples_per_second': 168.613, 'eval_steps_per_second': 21.105, 'epoch': 30.0}


 78%|███████▊  | 16802/21680 [47:26<12:24,  6.55it/s]  

{'loss': 0.0007, 'learning_rate': 2.25e-06, 'epoch': 31.0}


                                                     
 78%|███████▊  | 16803/21680 [47:35<3:44:20,  2.76s/it]

{'eval_loss': 1.3278460502624512, 'eval_accuracy': 0.8782489740082079, 'eval_runtime': 8.6838, 'eval_samples_per_second': 168.36, 'eval_steps_per_second': 21.074, 'epoch': 31.0}


 80%|████████  | 17344/21680 [48:58<11:08,  6.49it/s]  

{'loss': 0.0034, 'learning_rate': 2.0000000000000003e-06, 'epoch': 32.0}


                                                     
 80%|████████  | 17345/21680 [49:07<3:19:40,  2.76s/it]

{'eval_loss': 1.3439669609069824, 'eval_accuracy': 0.8782489740082079, 'eval_runtime': 8.6922, 'eval_samples_per_second': 168.197, 'eval_steps_per_second': 21.053, 'epoch': 32.0}


 82%|████████▎ | 17886/21680 [50:30<09:40,  6.53it/s]  

{'loss': 0.0031, 'learning_rate': 1.75e-06, 'epoch': 33.0}


                                                     
 83%|████████▎ | 17887/21680 [50:39<2:54:43,  2.76s/it]

{'eval_loss': 1.3731420040130615, 'eval_accuracy': 0.8755129958960328, 'eval_runtime': 8.692, 'eval_samples_per_second': 168.2, 'eval_steps_per_second': 21.054, 'epoch': 33.0}


 85%|████████▌ | 18428/21680 [52:02<08:17,  6.54it/s]  

{'loss': 0.0007, 'learning_rate': 1.5e-06, 'epoch': 34.0}


                                                     
 85%|████████▌ | 18429/21680 [52:11<2:29:40,  2.76s/it]

{'eval_loss': 1.3797893524169922, 'eval_accuracy': 0.8796169630642955, 'eval_runtime': 8.6909, 'eval_samples_per_second': 168.222, 'eval_steps_per_second': 21.057, 'epoch': 34.0}


 88%|████████▊ | 18970/21680 [53:34<06:58,  6.48it/s]  

{'loss': 0.0003, 'learning_rate': 1.25e-06, 'epoch': 35.0}


                                                     
 88%|████████▊ | 18971/21680 [53:43<2:04:44,  2.76s/it]

{'eval_loss': 1.402349591255188, 'eval_accuracy': 0.8741450068399452, 'eval_runtime': 8.6885, 'eval_samples_per_second': 168.268, 'eval_steps_per_second': 21.062, 'epoch': 35.0}


 90%|█████████ | 19512/21680 [55:06<05:31,  6.53it/s]  

{'loss': 0.0027, 'learning_rate': 1.0000000000000002e-06, 'epoch': 36.0}


                                                     
 90%|█████████ | 19513/21680 [55:15<1:39:43,  2.76s/it]

{'eval_loss': 1.38118577003479, 'eval_accuracy': 0.8782489740082079, 'eval_runtime': 8.6864, 'eval_samples_per_second': 168.309, 'eval_steps_per_second': 21.067, 'epoch': 36.0}


 92%|█████████▎| 20054/21680 [56:38<04:10,  6.50it/s]  

{'loss': 0.0046, 'learning_rate': 7.5e-07, 'epoch': 37.0}


                                                     
 93%|█████████▎| 20055/21680 [56:47<1:14:56,  2.77s/it]

{'eval_loss': 1.3970526456832886, 'eval_accuracy': 0.8761969904240766, 'eval_runtime': 8.6974, 'eval_samples_per_second': 168.097, 'eval_steps_per_second': 21.041, 'epoch': 37.0}


 95%|█████████▌| 20596/21680 [58:11<02:47,  6.49it/s]  

{'loss': 0.0002, 'learning_rate': 5.000000000000001e-07, 'epoch': 38.0}


                                                     
 95%|█████████▌| 20597/21680 [58:19<49:51,  2.76s/it]

{'eval_loss': 1.3617817163467407, 'eval_accuracy': 0.8837209302325582, 'eval_runtime': 8.6869, 'eval_samples_per_second': 168.299, 'eval_steps_per_second': 21.066, 'epoch': 38.0}


 98%|█████████▊| 21138/21680 [59:43<01:23,  6.49it/s]

{'loss': 0.0002, 'learning_rate': 2.5000000000000004e-07, 'epoch': 39.0}


                                                     
 98%|█████████▊| 21139/21680 [59:51<24:53,  2.76s/it]

{'eval_loss': 1.3860785961151123, 'eval_accuracy': 0.8775649794801642, 'eval_runtime': 8.6827, 'eval_samples_per_second': 168.38, 'eval_steps_per_second': 21.076, 'epoch': 39.0}


100%|██████████| 21680/21680 [1:01:15<00:00,  6.43it/s]

{'loss': 0.0003, 'learning_rate': 0.0, 'epoch': 40.0}


                                                       
100%|██████████| 21680/21680 [1:01:24<00:00,  5.88it/s]

{'eval_loss': 1.4360506534576416, 'eval_accuracy': 0.8727770177838577, 'eval_runtime': 8.6869, 'eval_samples_per_second': 168.3, 'eval_steps_per_second': 21.066, 'epoch': 40.0}
{'train_runtime': 3684.6781, 'train_samples_per_second': 47.071, 'train_steps_per_second': 5.884, 'train_loss': 0.04240683730437362, 'epoch': 40.0}
Execution Time : 3685 seconds





In [16]:
trainer.evaluate()

100%|██████████| 183/183 [00:08<00:00, 21.13it/s]


{'eval_loss': 1.4360506534576416,
 'eval_accuracy': 0.8727770177838577,
 'eval_runtime': 8.7105,
 'eval_samples_per_second': 167.844,
 'eval_steps_per_second': 21.009,
 'epoch': 40.0}