In [1]:
import pandas as pd

finetuned_dirname = "40-epoch-bert-base-finetuned-phemernr2-rnr"

data = pd.read_csv("../../data/phemernr2_dataset_with_tvt.csv", sep=",")
# data = data[['tweet_text', 'tvt2', 'label2']]
# data.columns = ['tweet_text', 'tvt2', 'label']
print(data.shape)
data.head()

(6425, 5)


Unnamed: 0,tweet_id,tweet_text,label,tvt2,tvt2_1
0,552833795142209536,the east london mosque would like to offer its...,non-rumours,training,training
1,580318210609696769,breaking - a germanwings airbus a320 plane rep...,true,validation,testting
2,552798891994009601,reports that two of the dead in the #charliehe...,true,training,training
3,576790814942236672,after #putin disappeared russian tv no longer ...,non-rumours,validation,training
4,499678822598340608,saw #ferguson for myself. #justiceformichaelbr...,non-rumours,testting,testting


In [2]:
data['label'] = data['label'].replace(['true', 'unverfied', 'false'], 'rumors')

In [3]:
combined_data = data

In [4]:
import torch

class CustomTextDataset(torch.utils.data.dataset.Dataset):

    def __init__(self, texts, labels):
        self.labels = labels
        self.texts = texts
        self.attention_mask = None
        self.input_ids = None
        self.token_type_ids = None

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        sample = {
            "text": self.texts[idx],
            "label": self.labels[idx],
            "attention_mask": self.attention_mask[idx] if self.attention_mask else None,
            "input_ids": self.input_ids[idx] if self.input_ids else None,
            "token_type_ids": self.token_type_ids[idx] if self.token_type_ids else None
        }
        return sample
    
    def tokenize(self, tokenizer):
        self.attention_mask = []
        self.input_ids = []
        self.token_type_ids = []

        for text in self.texts:
            token = tokenizer(text, padding="max_length", truncation=True)
            
            self.attention_mask.append(token['attention_mask'])
            self.input_ids.append(token['input_ids'])
            self.token_type_ids.append(token['token_type_ids'])

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
labels = []

labels_str = combined_data['label'].unique().tolist()
for i, d in combined_data.iterrows():
    if d['label'] == "non-rumours":
        lab = 1
    else:
        lab = 0
    labels.append(lab)
    
print(len(labels))
labels[:10]

6425


[1, 0, 0, 1, 1, 1, 1, 0, 1, 1]

In [6]:
train_dataset = CustomTextDataset(
    [d['tweet_text'] for i, d in combined_data.iterrows() if d['tvt2'] == 'training'],
    [labels[i] for i, d in combined_data.iterrows() if d['tvt2'] == 'training'])
test_dataset = CustomTextDataset(
    [d['tweet_text'] for i, d in combined_data.iterrows() if d['tvt2'] == 'validation'],
    [labels[i] for i, d in combined_data.iterrows() if d['tvt2'] == 'validation'])
train_dataset[0]

{'text': 'the east london mosque would like to offer its sincere condolences to the families of those killed during the #charliehebdo attacks (1/2)',
 'label': 1,
 'attention_mask': None,
 'input_ids': None,
 'token_type_ids': None}

In [7]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")

In [8]:
# inputs = tokenizer(["you're stuck in a timewrap from 2004 though", "summa lumma dumma lumma"], padding="max_length", truncation=True)
# for k,v in inputs.items():
#     print(k)

In [9]:
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

train_dataset.tokenize(tokenizer)
test_dataset.tokenize(tokenizer)

In [10]:
print(len(train_dataset))
print(len(test_dataset))

4336
1462


### Fine Tuning

In [11]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased",
                                                           output_hidden_states=False,
                                                           num_labels=2)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [12]:
from transformers import TrainingArguments

epochs = 40
batch_size = 8
save_steps = (round((len(train_dataset)/batch_size) + 0.49)) * epochs
# save_steps = 1_000_000

training_args = TrainingArguments(
    output_dir=f"../../data/models/{finetuned_dirname}",
    num_train_epochs=epochs,
    save_steps=save_steps,
    logging_steps=300,
    learning_rate=1e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    evaluation_strategy="epoch",
    logging_strategy="epoch"
    
)

print(f"Save Steps : {save_steps}")

Save Steps : 21680


In [13]:
import numpy as np
from datasets import load_metric

metric = load_metric("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

  metric = load_metric("accuracy")


In [14]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
)

In [15]:
import time

start = time.time()

trainer.train()

print(f"Execution Time : {round(time.time() - start)} seconds")

  2%|▎         | 542/21680 [02:44<1:45:27,  3.34it/s]

{'loss': 0.4768, 'learning_rate': 9.75e-06, 'epoch': 1.0}


                                                     
  2%|▎         | 542/21680 [03:01<1:45:27,  3.34it/s]

{'eval_loss': 0.41069549322128296, 'eval_accuracy': 0.83515731874145, 'eval_runtime': 17.1467, 'eval_samples_per_second': 85.264, 'eval_steps_per_second': 10.673, 'epoch': 1.0}


  5%|▌         | 1084/21680 [05:44<1:43:04,  3.33it/s]

{'loss': 0.3356, 'learning_rate': 9.5e-06, 'epoch': 2.0}


                                                      
  5%|▌         | 1084/21680 [06:01<1:43:04,  3.33it/s]

{'eval_loss': 0.3581829369068146, 'eval_accuracy': 0.8638850889192886, 'eval_runtime': 17.1064, 'eval_samples_per_second': 85.465, 'eval_steps_per_second': 10.698, 'epoch': 2.0}


  8%|▊         | 1626/21680 [08:44<1:40:01,  3.34it/s] 

{'loss': 0.2241, 'learning_rate': 9.250000000000001e-06, 'epoch': 3.0}


                                                      
  8%|▊         | 1626/21680 [09:02<1:40:01,  3.34it/s]

{'eval_loss': 0.5396726727485657, 'eval_accuracy': 0.8775649794801642, 'eval_runtime': 17.1213, 'eval_samples_per_second': 85.391, 'eval_steps_per_second': 10.688, 'epoch': 3.0}


 10%|█         | 2168/21680 [11:45<1:37:47,  3.33it/s] 

{'loss': 0.1439, 'learning_rate': 9e-06, 'epoch': 4.0}


                                                      
 10%|█         | 2168/21680 [12:02<1:37:47,  3.33it/s]

{'eval_loss': 0.6971474885940552, 'eval_accuracy': 0.872093023255814, 'eval_runtime': 17.1531, 'eval_samples_per_second': 85.232, 'eval_steps_per_second': 10.669, 'epoch': 4.0}


 12%|█▎        | 2710/21680 [14:45<1:34:48,  3.33it/s] 

{'loss': 0.0714, 'learning_rate': 8.750000000000001e-06, 'epoch': 5.0}


                                                      
 12%|█▎        | 2710/21680 [15:02<1:34:48,  3.33it/s]

{'eval_loss': 0.7954745888710022, 'eval_accuracy': 0.8768809849521204, 'eval_runtime': 17.1109, 'eval_samples_per_second': 85.442, 'eval_steps_per_second': 10.695, 'epoch': 5.0}


 15%|█▌        | 3252/21680 [17:45<1:31:58,  3.34it/s] 

{'loss': 0.0519, 'learning_rate': 8.5e-06, 'epoch': 6.0}


                                                      
 15%|█▌        | 3252/21680 [18:03<1:31:58,  3.34it/s]

{'eval_loss': 0.8596755862236023, 'eval_accuracy': 0.8755129958960328, 'eval_runtime': 17.1481, 'eval_samples_per_second': 85.257, 'eval_steps_per_second': 10.672, 'epoch': 6.0}


 18%|█▊        | 3794/21680 [20:46<1:30:18,  3.30it/s] 

{'loss': 0.0341, 'learning_rate': 8.25e-06, 'epoch': 7.0}


                                                      
 18%|█▊        | 3794/21680 [21:03<1:30:18,  3.30it/s]

{'eval_loss': 0.93038409948349, 'eval_accuracy': 0.8686730506155951, 'eval_runtime': 17.166, 'eval_samples_per_second': 85.168, 'eval_steps_per_second': 10.661, 'epoch': 7.0}


 20%|██        | 4336/21680 [23:46<1:26:35,  3.34it/s] 

{'loss': 0.0272, 'learning_rate': 8.000000000000001e-06, 'epoch': 8.0}


                                                      
 20%|██        | 4336/21680 [24:03<1:26:35,  3.34it/s]

{'eval_loss': 1.1327303647994995, 'eval_accuracy': 0.8625170998632011, 'eval_runtime': 17.1096, 'eval_samples_per_second': 85.449, 'eval_steps_per_second': 10.696, 'epoch': 8.0}


 22%|██▎       | 4878/21680 [26:47<1:24:31,  3.31it/s] 

{'loss': 0.0194, 'learning_rate': 7.75e-06, 'epoch': 9.0}


                                                      
 22%|██▎       | 4878/21680 [27:04<1:24:31,  3.31it/s]

{'eval_loss': 1.0322232246398926, 'eval_accuracy': 0.8727770177838577, 'eval_runtime': 17.1705, 'eval_samples_per_second': 85.146, 'eval_steps_per_second': 10.658, 'epoch': 9.0}


 25%|██▌       | 5420/21680 [29:47<1:21:34,  3.32it/s] 

{'loss': 0.019, 'learning_rate': 7.500000000000001e-06, 'epoch': 10.0}


                                                      
 25%|██▌       | 5420/21680 [30:04<1:21:34,  3.32it/s]

{'eval_loss': 1.270407795906067, 'eval_accuracy': 0.853625170998632, 'eval_runtime': 17.1324, 'eval_samples_per_second': 85.335, 'eval_steps_per_second': 10.682, 'epoch': 10.0}


 28%|██▊       | 5962/21680 [32:48<1:18:53,  3.32it/s] 

{'loss': 0.0305, 'learning_rate': 7.25e-06, 'epoch': 11.0}


                                                      
 28%|██▊       | 5962/21680 [33:05<1:18:53,  3.32it/s]

{'eval_loss': 1.0769760608673096, 'eval_accuracy': 0.8686730506155951, 'eval_runtime': 17.1133, 'eval_samples_per_second': 85.43, 'eval_steps_per_second': 10.693, 'epoch': 11.0}


 30%|███       | 6504/21680 [35:48<1:16:02,  3.33it/s] 

{'loss': 0.0178, 'learning_rate': 7e-06, 'epoch': 12.0}


                                                      
 30%|███       | 6504/21680 [36:05<1:16:02,  3.33it/s]

{'eval_loss': 1.073604941368103, 'eval_accuracy': 0.8782489740082079, 'eval_runtime': 17.1544, 'eval_samples_per_second': 85.226, 'eval_steps_per_second': 10.668, 'epoch': 12.0}


 32%|███▎      | 7046/21680 [38:48<1:13:08,  3.33it/s] 

{'loss': 0.0249, 'learning_rate': 6.750000000000001e-06, 'epoch': 13.0}


                                                      
 32%|███▎      | 7046/21680 [39:05<1:13:08,  3.33it/s]

{'eval_loss': 1.2664821147918701, 'eval_accuracy': 0.8590971272229823, 'eval_runtime': 17.1127, 'eval_samples_per_second': 85.434, 'eval_steps_per_second': 10.694, 'epoch': 13.0}


 35%|███▌      | 7588/21680 [41:48<1:10:22,  3.34it/s] 

{'loss': 0.0153, 'learning_rate': 6.5000000000000004e-06, 'epoch': 14.0}


                                                      
 35%|███▌      | 7588/21680 [42:05<1:10:22,  3.34it/s]

{'eval_loss': 1.2438197135925293, 'eval_accuracy': 0.8638850889192886, 'eval_runtime': 17.0949, 'eval_samples_per_second': 85.523, 'eval_steps_per_second': 10.705, 'epoch': 14.0}


 38%|███▊      | 8130/21680 [44:49<1:07:40,  3.34it/s] 

{'loss': 0.018, 'learning_rate': 6.25e-06, 'epoch': 15.0}


                                                      
 38%|███▊      | 8130/21680 [45:06<1:07:40,  3.34it/s]

{'eval_loss': 1.2162106037139893, 'eval_accuracy': 0.872093023255814, 'eval_runtime': 17.1165, 'eval_samples_per_second': 85.415, 'eval_steps_per_second': 10.691, 'epoch': 15.0}


 40%|████      | 8672/21680 [47:49<1:05:02,  3.33it/s] 

{'loss': 0.012, 'learning_rate': 6e-06, 'epoch': 16.0}


                                                      
 40%|████      | 8672/21680 [48:06<1:05:02,  3.33it/s]

{'eval_loss': 1.2641531229019165, 'eval_accuracy': 0.8652530779753762, 'eval_runtime': 17.1026, 'eval_samples_per_second': 85.484, 'eval_steps_per_second': 10.7, 'epoch': 16.0}


 42%|████▎     | 9214/21680 [50:49<1:02:25,  3.33it/s] 

{'loss': 0.013, 'learning_rate': 5.75e-06, 'epoch': 17.0}


                                                      
 42%|████▎     | 9214/21680 [51:06<1:02:25,  3.33it/s]

{'eval_loss': 1.1277883052825928, 'eval_accuracy': 0.874829001367989, 'eval_runtime': 17.1635, 'eval_samples_per_second': 85.181, 'eval_steps_per_second': 10.662, 'epoch': 17.0}


 45%|████▌     | 9756/21680 [53:50<59:43,  3.33it/s]   

{'loss': 0.0121, 'learning_rate': 5.500000000000001e-06, 'epoch': 18.0}


                                                    
 45%|████▌     | 9756/21680 [54:07<59:43,  3.33it/s]

{'eval_loss': 1.4156922101974487, 'eval_accuracy': 0.8543091655266758, 'eval_runtime': 17.1175, 'eval_samples_per_second': 85.41, 'eval_steps_per_second': 10.691, 'epoch': 18.0}


 48%|████▊     | 10298/21680 [56:50<56:58,  3.33it/s]  

{'loss': 0.0153, 'learning_rate': 5.2500000000000006e-06, 'epoch': 19.0}


                                                     
 48%|████▊     | 10298/21680 [57:07<56:58,  3.33it/s]

{'eval_loss': 1.2910035848617554, 'eval_accuracy': 0.8700410396716827, 'eval_runtime': 17.1824, 'eval_samples_per_second': 85.087, 'eval_steps_per_second': 10.65, 'epoch': 19.0}


 50%|█████     | 10840/21680 [59:50<54:08,  3.34it/s]   

{'loss': 0.0021, 'learning_rate': 5e-06, 'epoch': 20.0}


                                                     
 50%|█████     | 10840/21680 [1:00:07<54:08,  3.34it/s]

{'eval_loss': 1.5030103921890259, 'eval_accuracy': 0.8604651162790697, 'eval_runtime': 17.1616, 'eval_samples_per_second': 85.19, 'eval_steps_per_second': 10.663, 'epoch': 20.0}


 52%|█████▎    | 11382/21680 [1:02:51<51:48,  3.31it/s]   

{'loss': 0.0089, 'learning_rate': 4.75e-06, 'epoch': 21.0}


                                                       
 52%|█████▎    | 11382/21680 [1:03:08<51:48,  3.31it/s]

{'eval_loss': 1.5217865705490112, 'eval_accuracy': 0.8392612859097127, 'eval_runtime': 17.1162, 'eval_samples_per_second': 85.416, 'eval_steps_per_second': 10.692, 'epoch': 21.0}


 55%|█████▌    | 11924/21680 [1:05:51<48:50,  3.33it/s]   

{'loss': 0.0084, 'learning_rate': 4.5e-06, 'epoch': 22.0}


                                                       
 55%|█████▌    | 11924/21680 [1:06:08<48:50,  3.33it/s]

{'eval_loss': 1.2972973585128784, 'eval_accuracy': 0.8700410396716827, 'eval_runtime': 17.1637, 'eval_samples_per_second': 85.18, 'eval_steps_per_second': 10.662, 'epoch': 22.0}


 57%|█████▊    | 12466/21680 [1:08:51<46:11,  3.32it/s]   

{'loss': 0.0048, 'learning_rate': 4.25e-06, 'epoch': 23.0}


                                                       
 57%|█████▊    | 12466/21680 [1:09:09<46:11,  3.32it/s]

{'eval_loss': 1.3477013111114502, 'eval_accuracy': 0.8693570451436389, 'eval_runtime': 17.1444, 'eval_samples_per_second': 85.275, 'eval_steps_per_second': 10.674, 'epoch': 23.0}


 60%|██████    | 13008/21680 [1:11:52<43:25,  3.33it/s]   

{'loss': 0.0063, 'learning_rate': 4.000000000000001e-06, 'epoch': 24.0}


                                                       
 60%|██████    | 13008/21680 [1:12:09<43:25,  3.33it/s]

{'eval_loss': 1.2591520547866821, 'eval_accuracy': 0.8741450068399452, 'eval_runtime': 17.1117, 'eval_samples_per_second': 85.439, 'eval_steps_per_second': 10.694, 'epoch': 24.0}


 62%|██████▎   | 13550/21680 [1:14:52<40:40,  3.33it/s]   

{'loss': 0.0069, 'learning_rate': 3.7500000000000005e-06, 'epoch': 25.0}


                                                       
 62%|██████▎   | 13550/21680 [1:15:09<40:40,  3.33it/s]

{'eval_loss': 1.5960264205932617, 'eval_accuracy': 0.8508891928864569, 'eval_runtime': 17.1715, 'eval_samples_per_second': 85.141, 'eval_steps_per_second': 10.657, 'epoch': 25.0}


 65%|██████▌   | 14092/21680 [1:17:53<38:03,  3.32it/s]   

{'loss': 0.0026, 'learning_rate': 3.5e-06, 'epoch': 26.0}


                                                       
 65%|██████▌   | 14092/21680 [1:18:10<38:03,  3.32it/s]

{'eval_loss': 1.8100448846817017, 'eval_accuracy': 0.8331053351573188, 'eval_runtime': 17.1181, 'eval_samples_per_second': 85.407, 'eval_steps_per_second': 10.69, 'epoch': 26.0}


 68%|██████▊   | 14634/21680 [1:20:53<35:29,  3.31it/s]   

{'loss': 0.0054, 'learning_rate': 3.2500000000000002e-06, 'epoch': 27.0}


                                                       
 68%|██████▊   | 14634/21680 [1:21:10<35:29,  3.31it/s]

{'eval_loss': 1.405750036239624, 'eval_accuracy': 0.8734610123119015, 'eval_runtime': 17.1176, 'eval_samples_per_second': 85.409, 'eval_steps_per_second': 10.691, 'epoch': 27.0}


 70%|███████   | 15176/21680 [1:23:53<32:32,  3.33it/s]   

{'loss': 0.0061, 'learning_rate': 3e-06, 'epoch': 28.0}


                                                       
 70%|███████   | 15176/21680 [1:24:10<32:32,  3.33it/s]

{'eval_loss': 1.3944963216781616, 'eval_accuracy': 0.8693570451436389, 'eval_runtime': 17.1155, 'eval_samples_per_second': 85.42, 'eval_steps_per_second': 10.692, 'epoch': 28.0}


 72%|███████▎  | 15718/21680 [1:26:54<30:02,  3.31it/s]  

{'loss': 0.0036, 'learning_rate': 2.7500000000000004e-06, 'epoch': 29.0}


                                                       
 72%|███████▎  | 15718/21680 [1:27:11<30:02,  3.31it/s]

{'eval_loss': 1.3859204053878784, 'eval_accuracy': 0.8673050615595075, 'eval_runtime': 17.1198, 'eval_samples_per_second': 85.398, 'eval_steps_per_second': 10.689, 'epoch': 29.0}


 75%|███████▌  | 16260/21680 [1:29:54<27:06,  3.33it/s]  

{'loss': 0.0016, 'learning_rate': 2.5e-06, 'epoch': 30.0}


                                                       
 75%|███████▌  | 16260/21680 [1:30:11<27:06,  3.33it/s]

{'eval_loss': 1.475977897644043, 'eval_accuracy': 0.8679890560875513, 'eval_runtime': 17.1687, 'eval_samples_per_second': 85.155, 'eval_steps_per_second': 10.659, 'epoch': 30.0}


 78%|███████▊  | 16802/21680 [1:32:55<24:27,  3.32it/s]  

{'loss': 0.0033, 'learning_rate': 2.25e-06, 'epoch': 31.0}


                                                       
 78%|███████▊  | 16802/21680 [1:33:12<24:27,  3.32it/s]

{'eval_loss': 1.500679612159729, 'eval_accuracy': 0.8673050615595075, 'eval_runtime': 17.1139, 'eval_samples_per_second': 85.428, 'eval_steps_per_second': 10.693, 'epoch': 31.0}


 80%|████████  | 17344/21680 [1:35:55<21:43,  3.33it/s]  

{'loss': 0.0022, 'learning_rate': 2.0000000000000003e-06, 'epoch': 32.0}


                                                       
 80%|████████  | 17344/21680 [1:36:12<21:43,  3.33it/s]

{'eval_loss': 1.4629710912704468, 'eval_accuracy': 0.8693570451436389, 'eval_runtime': 17.1654, 'eval_samples_per_second': 85.171, 'eval_steps_per_second': 10.661, 'epoch': 32.0}


 82%|████████▎ | 17886/21680 [1:38:55<19:13,  3.29it/s]  

{'loss': 0.0, 'learning_rate': 1.75e-06, 'epoch': 33.0}


                                                       
 82%|████████▎ | 17886/21680 [1:39:13<19:13,  3.29it/s]

{'eval_loss': 1.4791829586029053, 'eval_accuracy': 0.8734610123119015, 'eval_runtime': 17.1694, 'eval_samples_per_second': 85.151, 'eval_steps_per_second': 10.658, 'epoch': 33.0}


 85%|████████▌ | 18428/21680 [1:41:56<16:17,  3.33it/s]  

{'loss': 0.0002, 'learning_rate': 1.5e-06, 'epoch': 34.0}


                                                       
 85%|████████▌ | 18428/21680 [1:42:13<16:17,  3.33it/s]

{'eval_loss': 1.5402127504348755, 'eval_accuracy': 0.8700410396716827, 'eval_runtime': 17.1249, 'eval_samples_per_second': 85.373, 'eval_steps_per_second': 10.686, 'epoch': 34.0}


 88%|████████▊ | 18970/21680 [1:44:56<13:33,  3.33it/s]  

{'loss': 0.0, 'learning_rate': 1.25e-06, 'epoch': 35.0}


                                                       
 88%|████████▊ | 18970/21680 [1:45:13<13:33,  3.33it/s]

{'eval_loss': 1.5355005264282227, 'eval_accuracy': 0.8707250341997264, 'eval_runtime': 17.1737, 'eval_samples_per_second': 85.13, 'eval_steps_per_second': 10.656, 'epoch': 35.0}


 90%|█████████ | 19512/21680 [1:47:56<10:50,  3.33it/s]  

{'loss': 0.0051, 'learning_rate': 1.0000000000000002e-06, 'epoch': 36.0}


 90%|█████████ | 19512/21680 [1:48:13<10:50,  3.33it/s]

{'eval_loss': 1.4561498165130615, 'eval_accuracy': 0.8761969904240766, 'eval_runtime': 17.1472, 'eval_samples_per_second': 85.262, 'eval_steps_per_second': 10.672, 'epoch': 36.0}


 92%|█████████▎| 20054/21680 [1:50:57<08:10,  3.31it/s]  

{'loss': 0.0001, 'learning_rate': 7.5e-07, 'epoch': 37.0}


                                                       
 92%|█████████▎| 20054/21680 [1:51:14<08:10,  3.31it/s]

{'eval_loss': 1.462721347808838, 'eval_accuracy': 0.8768809849521204, 'eval_runtime': 17.1136, 'eval_samples_per_second': 85.429, 'eval_steps_per_second': 10.693, 'epoch': 37.0}


 95%|█████████▌| 20596/21680 [1:53:57<05:26,  3.32it/s]  

{'loss': 0.0, 'learning_rate': 5.000000000000001e-07, 'epoch': 38.0}


                                                       
 95%|█████████▌| 20596/21680 [1:54:14<05:26,  3.32it/s]

{'eval_loss': 1.4743260145187378, 'eval_accuracy': 0.8761969904240766, 'eval_runtime': 17.1748, 'eval_samples_per_second': 85.125, 'eval_steps_per_second': 10.655, 'epoch': 38.0}


 98%|█████████▊| 21138/21680 [1:56:57<02:45,  3.28it/s]  

{'loss': 0.0, 'learning_rate': 2.5000000000000004e-07, 'epoch': 39.0}


                                                       
 98%|█████████▊| 21138/21680 [1:57:15<02:45,  3.28it/s]

{'eval_loss': 1.4818512201309204, 'eval_accuracy': 0.8775649794801642, 'eval_runtime': 17.1128, 'eval_samples_per_second': 85.433, 'eval_steps_per_second': 10.694, 'epoch': 39.0}


100%|██████████| 21680/21680 [1:59:59<00:00,  3.31it/s]

{'loss': 0.0022, 'learning_rate': 0.0, 'epoch': 40.0}


                                                       
100%|██████████| 21680/21680 [2:00:16<00:00,  3.00it/s]

{'eval_loss': 1.4888927936553955, 'eval_accuracy': 0.874829001367989, 'eval_runtime': 17.1304, 'eval_samples_per_second': 85.345, 'eval_steps_per_second': 10.683, 'epoch': 40.0}
{'train_runtime': 7216.4628, 'train_samples_per_second': 24.034, 'train_steps_per_second': 3.004, 'train_loss': 0.040799331205383794, 'epoch': 40.0}
Execution Time : 7217 seconds





In [16]:
trainer.evaluate()

100%|██████████| 183/183 [00:16<00:00, 10.79it/s]


{'eval_loss': 1.4888927936553955,
 'eval_accuracy': 0.874829001367989,
 'eval_runtime': 17.0515,
 'eval_samples_per_second': 85.74,
 'eval_steps_per_second': 10.732,
 'epoch': 40.0}