In [1]:
'''
fine-tune CodeBERT
Author: Liu Jin Cheng
'''
from transformers import GPT2ForSequenceClassification,GPT2Tokenizer,get_linear_schedule_with_warmup
from datasets import load_dataset,Dataset
import os
import random
import numpy as np
import evaluate
import torch
import argparse
from torch.utils.data import DataLoader
from tqdm import tqdm
from torch.functional import F
from torch.cuda.amp import autocast as autocast,GradScaler
import pandas as pd
from sklearn.metrics import f1_score
from torch.optim import AdamW
import pandas as pd


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
data_path = os.path.join("..","preprocess","dataset")
train_batch_size = 4
eval_batch_size = 2
lr = 7e-5
num_epochs = 30
model_name = "../../gpt2"
tokenizer = GPT2Tokenizer.from_pretrained(model_name,bos_token = "<|startoftext|>",eos_token = "<|endoftext|>",pad_token = "<|pad|>",cls_token = "<|cls|>",sep_token = "<|sep|>" ,model_max_length = 1024)
model = GPT2ForSequenceClassification.from_pretrained(model_name,num_labels = 66)
model.resize_token_embeddings(len(tokenizer))

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at ../../gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Embedding(50261, 768)

In [3]:

def compute_metrics(eval_pred):
        metirc = evaluate.load("accuracy")
        logits , labels = eval_pred
        predictions = np.argmax(logits,axis=-1)
        return metirc.compute(predictions=predictions,references=labels)


def tokenize_function(examples):
        return tokenizer(examples["text"],truncation = True,padding=True) 

def collate_fn(examples):
    return tokenizer.pad(examples, padding="max_length", return_tensors="pt")

In [4]:
dftrain = pd.read_pickle(os.path.join(data_path,"train.pkl"))
dfvalid = pd.read_pickle(os.path.join(data_path,"valid.pkl"))
traindatasets = Dataset.from_pandas(dftrain)
validdatasets = Dataset.from_pandas(dfvalid)

In [5]:
train_tokenized_dataset = traindatasets.map(tokenize_function,batched=True,remove_columns=["text","__index_level_0__"]).rename_column("label","labels")
valid_tokenized_dataset = validdatasets.map(tokenize_function,batched=True,remove_columns=["text","__index_level_0__"]).rename_column("label","labels")
train_dataloader = DataLoader(train_tokenized_dataset,shuffle=True,collate_fn=collate_fn,batch_size = train_batch_size)
eval_dataloader = DataLoader(valid_tokenized_dataset , collate_fn=collate_fn,batch_size = eval_batch_size)
model.resize_token_embeddings(len(tokenizer))
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model.to(device)

                                                                  

GPT2ForSequenceClassification(
  (transformer): GPT2Model(
    (wte): Embedding(50261, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (score): Linear(in_features=768, out_features=66, bias=False)
)

In [6]:
optimizer = AdamW(params=model.parameters(), lr=lr)

# Instantiate scheduler

lr_scheduler = get_linear_schedule_with_warmup(
    optimizer=optimizer,
    num_warmup_steps=0.06 * (len(train_dataloader) * num_epochs),
    num_training_steps=(len(train_dataloader) * num_epochs),
)


In [7]:
model.to(device)
max_eval_acc = 0
iter_to_accumlate = 4
epochloss = []
trainlogdf = pd.DataFrame(columns=["step","trainloss","validloss","acc","f1-score"])
rowindex = 0
eval_no_progress_count = 0
early_stopping = 6
early_stopping_flag = False
for epoch in range(num_epochs):
    model.train()
    allloss = 0
    for step,batch in enumerate(tqdm(train_dataloader)):
        batch.to(device)
        outputs = model(**batch)
        loss = outputs.loss/iter_to_accumlate
        loss.backward()
        allloss += loss.item()
        trainlogdf.loc[rowindex] = [rowindex,loss.item(),None,None,None]
        rowindex += 1
        epochloss.append(loss.item())
        if (step+1)%iter_to_accumlate==0:
            optimizer.step()
            lr_scheduler.step()
            optimizer.zero_grad()
        if (step+1)%(50*iter_to_accumlate) == 0:
            print("epoch",epoch,"step",step,"loss",loss,sep=" ")
        
        if (step+1)%800 == 0:
            count = 0
            model.eval()
            validloss = 0
            preds = []
            labels = []
            for evalstep,batch in enumerate(tqdm(eval_dataloader)):
                labels += batch['labels'].cpu()
                batch.to(device)
                with torch.no_grad():
                    output = model(**batch)
                validloss += output.loss.item()
                pred = torch.argmax(F.softmax(output.logits.cpu(),dim=1),dim=1)
                preds += pred
                count += int(sum(batch['labels'].cpu() == pred))
            model.train()
            eval_acc = count/2732
            trainlogdf.loc[rowindex-1,"validloss"] = validloss/2732
            trainlogdf.loc[rowindex-1,"acc"] = eval_acc
            trainlogdf.loc[rowindex-1,"f1-score"] = f1_score(np.array(labels),np.array(preds),average="macro")
            print("epoch ",epoch,"step",step,"acc ",eval_acc)
            if eval_acc < max_eval_acc:
                eval_no_progress_count += 1
                if eval_no_progress_count >=early_stopping:
                    print("Early Stopping:Epoch",epoch," Step",step,"Eval_acc",eval_acc,sep=" ")
                    early_stopping_flag = True
                    break
                else:
                    print("Early Stopping record count",eval_no_progress_count,"/",early_stopping,"Will stop","Max eval acc",max_eval_acc,sep=" ")
            if eval_acc > max_eval_acc:
                max_eval_acc = eval_acc
                print("Update Max eval acc",max_eval_acc)
                eval_no_progress_count = 0
                model.save_pretrained("GPT2saved_models")
                torch.save(model.state_dict(),os.path.join("checkpoint","model.bin"))
                torch.save(optimizer.state_dict(),os.path.join("checkpoint","optimizer.bin"))
                torch.save(lr_scheduler.state_dict(),os.path.join("checkpoint","lr_scheduler.bin"))
    print("epoch",epoch,"step",step,"trainLoss:",allloss/(len(train_dataloader)*train_batch_size))
    if early_stopping_flag:
        break
trainlogdf.to_csv("trainlog.csv")
tokenizer.save_pretrained("GPT2saved_models")

  4%|▎         | 200/5464 [01:55<49:07,  1.79it/s]

epoch 0 step 199 loss tensor(3.0066, device='cuda:0', grad_fn=<DivBackward0>)


  7%|▋         | 400/5464 [03:50<49:02,  1.72it/s]

epoch 0 step 399 loss tensor(2.0652, device='cuda:0', grad_fn=<DivBackward0>)


 11%|█         | 600/5464 [05:45<46:49,  1.73it/s]

epoch 0 step 599 loss tensor(1.4153, device='cuda:0', grad_fn=<DivBackward0>)


 15%|█▍        | 799/5464 [07:38<43:05,  1.80it/s]

epoch 0 step 799 loss tensor(1.1349, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1366/1366 [02:14<00:00, 10.16it/s]


epoch  0 step 799 acc  0.14128843338213762
Update Max eval acc 0.14128843338213762


 18%|█▊        | 1000/5464 [12:01<43:09,  1.72it/s]  

epoch 0 step 999 loss tensor(1.6792, device='cuda:0', grad_fn=<DivBackward0>)


 22%|██▏       | 1200/5464 [13:54<41:16,  1.72it/s]

epoch 0 step 1199 loss tensor(0.9010, device='cuda:0', grad_fn=<DivBackward0>)


 26%|██▌       | 1400/5464 [15:48<37:49,  1.79it/s]

epoch 0 step 1399 loss tensor(1.1780, device='cuda:0', grad_fn=<DivBackward0>)


 29%|██▉       | 1599/5464 [17:41<36:27,  1.77it/s]

epoch 0 step 1599 loss tensor(0.7191, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1366/1366 [02:32<00:00,  8.93it/s]


epoch  0 step 1599 acc  0.41654465592972184
Update Max eval acc 0.41654465592972184


 33%|███▎      | 1800/5464 [22:21<35:27,  1.72it/s]   

epoch 0 step 1799 loss tensor(0.4353, device='cuda:0', grad_fn=<DivBackward0>)


 37%|███▋      | 2000/5464 [24:15<33:22,  1.73it/s]

epoch 0 step 1999 loss tensor(0.3188, device='cuda:0', grad_fn=<DivBackward0>)


 40%|████      | 2200/5464 [26:09<31:27,  1.73it/s]

epoch 0 step 2199 loss tensor(0.2660, device='cuda:0', grad_fn=<DivBackward0>)


 44%|████▍     | 2399/5464 [28:01<24:24,  2.09it/s]

epoch 0 step 2399 loss tensor(0.4338, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1366/1366 [02:16<00:00, 10.03it/s]


epoch  0 step 2399 acc  0.5453879941434846
Update Max eval acc 0.5453879941434846


 48%|████▊     | 2600/5464 [32:24<27:43,  1.72it/s]   

epoch 0 step 2599 loss tensor(0.2277, device='cuda:0', grad_fn=<DivBackward0>)


 51%|█████     | 2800/5464 [34:18<25:42,  1.73it/s]

epoch 0 step 2799 loss tensor(0.2742, device='cuda:0', grad_fn=<DivBackward0>)


 55%|█████▍    | 3000/5464 [36:07<10:58,  3.74it/s]

epoch 0 step 2999 loss tensor(0.2320, device='cuda:0', grad_fn=<DivBackward0>)


 59%|█████▊    | 3199/5464 [37:49<21:27,  1.76it/s]

epoch 0 step 3199 loss tensor(0.1638, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1366/1366 [02:32<00:00,  8.94it/s]
 59%|█████▊    | 3200/5464 [40:22<29:11:32, 46.42s/it]

epoch  0 step 3199 acc  0.4864568081991215
Early Stopping record count 1 / 6 Will stop Max eval acc 0.5453879941434846


 62%|██████▏   | 3400/5464 [42:15<19:58,  1.72it/s]   

epoch 0 step 3399 loss tensor(0.1874, device='cuda:0', grad_fn=<DivBackward0>)


 66%|██████▌   | 3600/5464 [44:09<18:00,  1.73it/s]

epoch 0 step 3599 loss tensor(0.1836, device='cuda:0', grad_fn=<DivBackward0>)


 70%|██████▉   | 3800/5464 [46:03<16:01,  1.73it/s]

epoch 0 step 3799 loss tensor(0.1375, device='cuda:0', grad_fn=<DivBackward0>)


 73%|███████▎  | 3999/5464 [47:55<13:31,  1.80it/s]

epoch 0 step 3999 loss tensor(0.1667, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1366/1366 [02:14<00:00, 10.12it/s]


epoch  0 step 3999 acc  0.5629575402635432
Update Max eval acc 0.5629575402635432


 77%|███████▋  | 4200/5464 [52:19<12:10,  1.73it/s]   

epoch 0 step 4199 loss tensor(0.2207, device='cuda:0', grad_fn=<DivBackward0>)


 81%|████████  | 4400/5464 [54:13<10:16,  1.73it/s]

epoch 0 step 4399 loss tensor(0.2020, device='cuda:0', grad_fn=<DivBackward0>)


 84%|████████▍ | 4600/5464 [56:06<08:08,  1.77it/s]

epoch 0 step 4599 loss tensor(0.2160, device='cuda:0', grad_fn=<DivBackward0>)


 88%|████████▊ | 4799/5464 [57:59<06:18,  1.76it/s]

epoch 0 step 4799 loss tensor(0.2210, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1366/1366 [02:32<00:00,  8.94it/s]
 88%|████████▊ | 4800/5464 [1:00:32<8:34:00, 46.45s/it]

epoch  0 step 4799 acc  0.5453879941434846
Early Stopping record count 1 / 6 Will stop Max eval acc 0.5629575402635432


 92%|█████████▏| 5000/5464 [1:02:25<04:28,  1.73it/s]  

epoch 0 step 4999 loss tensor(0.2182, device='cuda:0', grad_fn=<DivBackward0>)


 95%|█████████▌| 5200/5464 [1:04:19<02:31,  1.74it/s]

epoch 0 step 5199 loss tensor(0.1498, device='cuda:0', grad_fn=<DivBackward0>)


 99%|█████████▉| 5400/5464 [1:06:13<00:37,  1.72it/s]

epoch 0 step 5399 loss tensor(0.1276, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 5464/5464 [1:06:49<00:00,  1.36it/s]


epoch 0 step 5463 trainLoss: 0.1330351980335228


  4%|▎         | 200/5464 [01:52<50:21,  1.74it/s]

epoch 1 step 199 loss tensor(0.1778, device='cuda:0', grad_fn=<DivBackward0>)


  7%|▋         | 400/5464 [03:46<48:45,  1.73it/s]

epoch 1 step 399 loss tensor(0.1468, device='cuda:0', grad_fn=<DivBackward0>)


 11%|█         | 600/5464 [05:40<46:47,  1.73it/s]

epoch 1 step 599 loss tensor(0.1306, device='cuda:0', grad_fn=<DivBackward0>)


 15%|█▍        | 799/5464 [07:33<44:07,  1.76it/s]

epoch 1 step 799 loss tensor(0.1323, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1366/1366 [02:14<00:00, 10.19it/s]


epoch  1 step 799 acc  0.5856515373352855
Update Max eval acc 0.5856515373352855


 18%|█▊        | 1000/5464 [11:55<43:03,  1.73it/s]  

epoch 1 step 999 loss tensor(0.1737, device='cuda:0', grad_fn=<DivBackward0>)


 22%|██▏       | 1200/5464 [13:49<40:45,  1.74it/s]

epoch 1 step 1199 loss tensor(0.2560, device='cuda:0', grad_fn=<DivBackward0>)


 26%|██▌       | 1400/5464 [15:41<39:21,  1.72it/s]

epoch 1 step 1399 loss tensor(0.1583, device='cuda:0', grad_fn=<DivBackward0>)


 29%|██▉       | 1599/5464 [17:35<36:35,  1.76it/s]

epoch 1 step 1599 loss tensor(0.2203, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1366/1366 [02:32<00:00,  8.94it/s]
 29%|██▉       | 1600/5464 [20:08<49:50:28, 46.44s/it]

epoch  1 step 1599 acc  0.48279648609077597
Early Stopping record count 1 / 6 Will stop Max eval acc 0.5856515373352855


 33%|███▎      | 1800/5464 [22:01<35:24,  1.72it/s]   

epoch 1 step 1799 loss tensor(0.2121, device='cuda:0', grad_fn=<DivBackward0>)


 37%|███▋      | 2000/5464 [23:55<33:19,  1.73it/s]

epoch 1 step 1999 loss tensor(0.1593, device='cuda:0', grad_fn=<DivBackward0>)


 40%|████      | 2200/5464 [25:48<31:22,  1.73it/s]

epoch 1 step 2199 loss tensor(0.1118, device='cuda:0', grad_fn=<DivBackward0>)


 44%|████▍     | 2399/5464 [27:42<29:02,  1.76it/s]

epoch 1 step 2399 loss tensor(0.2496, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1366/1366 [02:31<00:00,  9.02it/s]


epoch  1 step 2399 acc  0.5966325036603221
Update Max eval acc 0.5966325036603221


 48%|████▊     | 2600/5464 [32:21<27:28,  1.74it/s]   

epoch 1 step 2599 loss tensor(0.1656, device='cuda:0', grad_fn=<DivBackward0>)


 51%|█████     | 2800/5464 [34:15<25:38,  1.73it/s]

epoch 1 step 2799 loss tensor(0.1945, device='cuda:0', grad_fn=<DivBackward0>)


 55%|█████▍    | 3000/5464 [36:07<23:50,  1.72it/s]

epoch 1 step 2999 loss tensor(0.1936, device='cuda:0', grad_fn=<DivBackward0>)


 59%|█████▊    | 3199/5464 [38:00<21:33,  1.75it/s]

epoch 1 step 3199 loss tensor(0.1095, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1366/1366 [02:32<00:00,  8.97it/s]
 59%|█████▊    | 3200/5464 [40:33<29:06:02, 46.27s/it]

epoch  1 step 3199 acc  0.5922401171303074
Early Stopping record count 1 / 6 Will stop Max eval acc 0.5966325036603221


 62%|██████▏   | 3400/5464 [42:26<19:50,  1.73it/s]   

epoch 1 step 3399 loss tensor(0.1897, device='cuda:0', grad_fn=<DivBackward0>)


 66%|██████▌   | 3600/5464 [44:20<17:56,  1.73it/s]

epoch 1 step 3599 loss tensor(0.1531, device='cuda:0', grad_fn=<DivBackward0>)


 70%|██████▉   | 3800/5464 [46:13<16:02,  1.73it/s]

epoch 1 step 3799 loss tensor(0.1806, device='cuda:0', grad_fn=<DivBackward0>)


 73%|███████▎  | 3999/5464 [48:07<13:50,  1.76it/s]

epoch 1 step 3999 loss tensor(0.1312, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1366/1366 [01:42<00:00, 13.33it/s]


epoch  1 step 3999 acc  0.6017569546120058
Update Max eval acc 0.6017569546120058


 77%|███████▋  | 4200/5464 [50:56<05:34,  3.78it/s]   

epoch 1 step 4199 loss tensor(0.1355, device='cuda:0', grad_fn=<DivBackward0>)


 81%|████████  | 4400/5464 [51:49<04:41,  3.78it/s]

epoch 1 step 4399 loss tensor(0.1696, device='cuda:0', grad_fn=<DivBackward0>)


 84%|████████▍ | 4600/5464 [52:41<03:48,  3.79it/s]

epoch 1 step 4599 loss tensor(0.1589, device='cuda:0', grad_fn=<DivBackward0>)


 88%|████████▊ | 4799/5464 [53:33<02:52,  3.85it/s]

epoch 1 step 4799 loss tensor(0.1476, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1366/1366 [01:05<00:00, 20.72it/s]
 88%|████████▊ | 4800/5464 [54:39<3:41:50, 20.05s/it]

epoch  1 step 4799 acc  0.5922401171303074
Early Stopping record count 1 / 6 Will stop Max eval acc 0.6017569546120058


 92%|█████████▏| 5000/5464 [55:32<02:02,  3.77it/s]  

epoch 1 step 4999 loss tensor(0.1709, device='cuda:0', grad_fn=<DivBackward0>)


 95%|█████████▌| 5200/5464 [56:24<01:09,  3.78it/s]

epoch 1 step 5199 loss tensor(0.1744, device='cuda:0', grad_fn=<DivBackward0>)


 99%|█████████▉| 5400/5464 [57:16<00:16,  3.78it/s]

epoch 1 step 5399 loss tensor(0.2193, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 5464/5464 [57:33<00:00,  1.58it/s]


epoch 1 step 5463 trainLoss: 0.04340098439006743


  4%|▎         | 200/5464 [00:52<23:13,  3.78it/s]

epoch 2 step 199 loss tensor(0.1831, device='cuda:0', grad_fn=<DivBackward0>)


  7%|▋         | 400/5464 [01:44<22:24,  3.77it/s]

epoch 2 step 399 loss tensor(0.1490, device='cuda:0', grad_fn=<DivBackward0>)


 11%|█         | 600/5464 [02:36<21:28,  3.77it/s]

epoch 2 step 599 loss tensor(0.1622, device='cuda:0', grad_fn=<DivBackward0>)


 15%|█▍        | 799/5464 [03:28<20:13,  3.84it/s]

epoch 2 step 799 loss tensor(0.1159, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1366/1366 [01:05<00:00, 20.71it/s]


epoch  2 step 799 acc  0.6112737920937042
Update Max eval acc 0.6112737920937042


 18%|█▊        | 1000/5464 [05:40<19:38,  3.79it/s]  

epoch 2 step 999 loss tensor(0.1733, device='cuda:0', grad_fn=<DivBackward0>)


 22%|██▏       | 1200/5464 [06:32<18:50,  3.77it/s]

epoch 2 step 1199 loss tensor(0.1517, device='cuda:0', grad_fn=<DivBackward0>)


 26%|██▌       | 1400/5464 [07:25<17:56,  3.78it/s]

epoch 2 step 1399 loss tensor(0.1152, device='cuda:0', grad_fn=<DivBackward0>)


 29%|██▉       | 1599/5464 [08:17<16:45,  3.84it/s]

epoch 2 step 1599 loss tensor(0.1496, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1366/1366 [01:06<00:00, 20.69it/s]
 29%|██▉       | 1600/5464 [09:23<21:33:06, 20.08s/it]

epoch  2 step 1599 acc  0.5636896046852123
Early Stopping record count 1 / 6 Will stop Max eval acc 0.6112737920937042


 33%|███▎      | 1800/5464 [10:15<16:12,  3.77it/s]   

epoch 2 step 1799 loss tensor(0.1649, device='cuda:0', grad_fn=<DivBackward0>)


 37%|███▋      | 2000/5464 [11:07<15:15,  3.78it/s]

epoch 2 step 1999 loss tensor(0.1870, device='cuda:0', grad_fn=<DivBackward0>)


 40%|████      | 2200/5464 [12:00<14:23,  3.78it/s]

epoch 2 step 2199 loss tensor(0.2910, device='cuda:0', grad_fn=<DivBackward0>)


 44%|████▍     | 2399/5464 [12:52<13:20,  3.83it/s]

epoch 2 step 2399 loss tensor(0.2246, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1366/1366 [01:06<00:00, 20.68it/s]
 44%|████▍     | 2400/5464 [13:58<17:05:37, 20.08s/it]

epoch  2 step 2399 acc  0.5699121522693997
Early Stopping record count 2 / 6 Will stop Max eval acc 0.6112737920937042


 48%|████▊     | 2600/5464 [14:50<12:39,  3.77it/s]   

epoch 2 step 2599 loss tensor(0.1570, device='cuda:0', grad_fn=<DivBackward0>)


 51%|█████     | 2800/5464 [15:43<11:45,  3.78it/s]

epoch 2 step 2799 loss tensor(0.2455, device='cuda:0', grad_fn=<DivBackward0>)


 55%|█████▍    | 3000/5464 [16:35<10:53,  3.77it/s]

epoch 2 step 2999 loss tensor(0.1297, device='cuda:0', grad_fn=<DivBackward0>)


 59%|█████▊    | 3199/5464 [17:27<09:51,  3.83it/s]

epoch 2 step 3199 loss tensor(0.1508, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1366/1366 [01:05<00:00, 20.70it/s]
 59%|█████▊    | 3200/5464 [18:33<12:37:09, 20.07s/it]

epoch  2 step 3199 acc  0.5757686676427526
Early Stopping record count 3 / 6 Will stop Max eval acc 0.6112737920937042


 62%|██████▏   | 3400/5464 [19:25<09:04,  3.79it/s]   

epoch 2 step 3399 loss tensor(0.0977, device='cuda:0', grad_fn=<DivBackward0>)


 66%|██████▌   | 3600/5464 [20:18<08:14,  3.77it/s]

epoch 2 step 3599 loss tensor(0.2012, device='cuda:0', grad_fn=<DivBackward0>)


 70%|██████▉   | 3800/5464 [21:10<07:19,  3.78it/s]

epoch 2 step 3799 loss tensor(0.1495, device='cuda:0', grad_fn=<DivBackward0>)


 73%|███████▎  | 3999/5464 [22:02<06:21,  3.84it/s]

epoch 2 step 3999 loss tensor(0.1176, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1366/1366 [01:05<00:00, 20.70it/s]


epoch  2 step 3999 acc  0.6350658857979502
Update Max eval acc 0.6350658857979502


 77%|███████▋  | 4200/5464 [24:18<05:33,  3.79it/s]   

epoch 2 step 4199 loss tensor(0.1650, device='cuda:0', grad_fn=<DivBackward0>)


 81%|████████  | 4400/5464 [25:11<04:41,  3.77it/s]

epoch 2 step 4399 loss tensor(0.1389, device='cuda:0', grad_fn=<DivBackward0>)


 84%|████████▍ | 4600/5464 [26:03<03:49,  3.77it/s]

epoch 2 step 4599 loss tensor(0.1831, device='cuda:0', grad_fn=<DivBackward0>)


 88%|████████▊ | 4799/5464 [26:55<02:54,  3.82it/s]

epoch 2 step 4799 loss tensor(0.2046, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1366/1366 [01:06<00:00, 20.65it/s]
 88%|████████▊ | 4800/5464 [28:01<3:42:48, 20.13s/it]

epoch  2 step 4799 acc  0.6233528550512445
Early Stopping record count 1 / 6 Will stop Max eval acc 0.6350658857979502


 92%|█████████▏| 5000/5464 [28:54<02:03,  3.77it/s]  

epoch 2 step 4999 loss tensor(0.1590, device='cuda:0', grad_fn=<DivBackward0>)


 95%|█████████▌| 5200/5464 [29:46<01:09,  3.77it/s]

epoch 2 step 5199 loss tensor(0.2083, device='cuda:0', grad_fn=<DivBackward0>)


 99%|█████████▉| 5400/5464 [30:39<00:16,  3.78it/s]

epoch 2 step 5399 loss tensor(0.2348, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 5464/5464 [30:55<00:00,  2.94it/s]


epoch 2 step 5463 trainLoss: 0.0415588499002205


  4%|▎         | 200/5464 [00:52<23:11,  3.78it/s]

epoch 3 step 199 loss tensor(0.1267, device='cuda:0', grad_fn=<DivBackward0>)


  7%|▋         | 400/5464 [01:44<22:20,  3.78it/s]

epoch 3 step 399 loss tensor(0.1854, device='cuda:0', grad_fn=<DivBackward0>)


 11%|█         | 600/5464 [02:37<21:34,  3.76it/s]

epoch 3 step 599 loss tensor(0.1736, device='cuda:0', grad_fn=<DivBackward0>)


 15%|█▍        | 799/5464 [03:29<20:13,  3.84it/s]

epoch 3 step 799 loss tensor(0.1102, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1366/1366 [01:06<00:00, 20.66it/s]


epoch  3 step 799 acc  0.6475109809663251
Update Max eval acc 0.6475109809663251


 18%|█▊        | 1000/5464 [05:46<19:39,  3.79it/s]  

epoch 3 step 999 loss tensor(0.1496, device='cuda:0', grad_fn=<DivBackward0>)


 22%|██▏       | 1200/5464 [06:39<18:53,  3.76it/s]

epoch 3 step 1199 loss tensor(0.2413, device='cuda:0', grad_fn=<DivBackward0>)


 26%|██▌       | 1400/5464 [07:31<17:56,  3.78it/s]

epoch 3 step 1399 loss tensor(0.2077, device='cuda:0', grad_fn=<DivBackward0>)


 29%|██▉       | 1599/5464 [08:23<16:48,  3.83it/s]

epoch 3 step 1599 loss tensor(0.1691, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1366/1366 [01:05<00:00, 20.70it/s]
 29%|██▉       | 1600/5464 [09:29<21:32:19, 20.07s/it]

epoch  3 step 1599 acc  0.5395314787701317
Early Stopping record count 1 / 6 Will stop Max eval acc 0.6475109809663251


 33%|███▎      | 1800/5464 [10:21<16:11,  3.77it/s]   

epoch 3 step 1799 loss tensor(0.1894, device='cuda:0', grad_fn=<DivBackward0>)


 37%|███▋      | 2000/5464 [11:14<15:17,  3.78it/s]

epoch 3 step 1999 loss tensor(0.1278, device='cuda:0', grad_fn=<DivBackward0>)


 40%|████      | 2200/5464 [12:06<14:25,  3.77it/s]

epoch 3 step 2199 loss tensor(0.1141, device='cuda:0', grad_fn=<DivBackward0>)


 44%|████▍     | 2399/5464 [12:58<13:18,  3.84it/s]

epoch 3 step 2399 loss tensor(0.1888, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1366/1366 [01:06<00:00, 20.67it/s]
 44%|████▍     | 2400/5464 [14:04<17:06:12, 20.10s/it]

epoch  3 step 2399 acc  0.6379941434846267
Early Stopping record count 2 / 6 Will stop Max eval acc 0.6475109809663251


 48%|████▊     | 2600/5464 [14:57<12:38,  3.78it/s]   

epoch 3 step 2599 loss tensor(0.1917, device='cuda:0', grad_fn=<DivBackward0>)


 51%|█████     | 2800/5464 [15:49<11:41,  3.80it/s]

epoch 3 step 2799 loss tensor(0.1590, device='cuda:0', grad_fn=<DivBackward0>)


 55%|█████▍    | 3000/5464 [16:41<10:50,  3.79it/s]

epoch 3 step 2999 loss tensor(0.1379, device='cuda:0', grad_fn=<DivBackward0>)


 59%|█████▊    | 3199/5464 [17:33<09:52,  3.82it/s]

epoch 3 step 3199 loss tensor(0.2415, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1366/1366 [01:06<00:00, 20.69it/s]
 59%|█████▊    | 3200/5464 [18:40<12:37:41, 20.08s/it]

epoch  3 step 3199 acc  0.636896046852123
Early Stopping record count 3 / 6 Will stop Max eval acc 0.6475109809663251


 62%|██████▏   | 3400/5464 [19:32<09:09,  3.76it/s]   

epoch 3 step 3399 loss tensor(0.1316, device='cuda:0', grad_fn=<DivBackward0>)


 66%|██████▌   | 3600/5464 [20:24<08:14,  3.77it/s]

epoch 3 step 3599 loss tensor(0.1367, device='cuda:0', grad_fn=<DivBackward0>)


 70%|██████▉   | 3800/5464 [21:17<07:20,  3.77it/s]

epoch 3 step 3799 loss tensor(0.1967, device='cuda:0', grad_fn=<DivBackward0>)


 73%|███████▎  | 3999/5464 [22:09<06:22,  3.83it/s]

epoch 3 step 3999 loss tensor(0.1091, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1366/1366 [01:06<00:00, 20.69it/s]
 73%|███████▎  | 4000/5464 [23:15<8:09:47, 20.07s/it]

epoch  3 step 3999 acc  0.6383601756954612
Early Stopping record count 4 / 6 Will stop Max eval acc 0.6475109809663251


 77%|███████▋  | 4200/5464 [24:07<05:35,  3.77it/s]  

epoch 3 step 4199 loss tensor(0.2133, device='cuda:0', grad_fn=<DivBackward0>)


 81%|████████  | 4400/5464 [25:00<04:41,  3.77it/s]

epoch 3 step 4399 loss tensor(0.2085, device='cuda:0', grad_fn=<DivBackward0>)


 84%|████████▍ | 4600/5464 [25:52<03:48,  3.77it/s]

epoch 3 step 4599 loss tensor(0.1767, device='cuda:0', grad_fn=<DivBackward0>)


 88%|████████▊ | 4799/5464 [26:44<02:53,  3.84it/s]

epoch 3 step 4799 loss tensor(0.1151, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1366/1366 [01:06<00:00, 20.69it/s]
 88%|████████▊ | 4800/5464 [27:50<3:42:09, 20.07s/it]

epoch  3 step 4799 acc  0.6405563689604685
Early Stopping record count 5 / 6 Will stop Max eval acc 0.6475109809663251


 92%|█████████▏| 5000/5464 [28:43<02:03,  3.77it/s]  

epoch 3 step 4999 loss tensor(0.1718, device='cuda:0', grad_fn=<DivBackward0>)


 95%|█████████▌| 5200/5464 [29:35<01:09,  3.78it/s]

epoch 3 step 5199 loss tensor(0.1026, device='cuda:0', grad_fn=<DivBackward0>)


 99%|█████████▉| 5400/5464 [30:28<00:16,  3.78it/s]

epoch 3 step 5399 loss tensor(0.0963, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 5464/5464 [30:44<00:00,  2.96it/s]


epoch 3 step 5463 trainLoss: 0.03969364298930621


  4%|▎         | 200/5464 [00:52<23:16,  3.77it/s]

epoch 4 step 199 loss tensor(0.1786, device='cuda:0', grad_fn=<DivBackward0>)


  7%|▋         | 400/5464 [01:44<22:27,  3.76it/s]

epoch 4 step 399 loss tensor(0.1586, device='cuda:0', grad_fn=<DivBackward0>)


 11%|█         | 600/5464 [02:37<21:32,  3.76it/s]

epoch 4 step 599 loss tensor(0.2002, device='cuda:0', grad_fn=<DivBackward0>)


 15%|█▍        | 799/5464 [03:29<20:22,  3.82it/s]

epoch 4 step 799 loss tensor(0.0816, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1366/1366 [01:06<00:00, 20.68it/s]


epoch  4 step 799 acc  0.6526354319180088
Update Max eval acc 0.6526354319180088


 18%|█▊        | 1000/5464 [05:49<19:42,  3.77it/s]  

epoch 4 step 999 loss tensor(0.1614, device='cuda:0', grad_fn=<DivBackward0>)


 22%|██▏       | 1200/5464 [06:41<18:48,  3.78it/s]

epoch 4 step 1199 loss tensor(0.1478, device='cuda:0', grad_fn=<DivBackward0>)


 26%|██▌       | 1400/5464 [07:33<18:02,  3.75it/s]

epoch 4 step 1399 loss tensor(0.1121, device='cuda:0', grad_fn=<DivBackward0>)


 29%|██▉       | 1599/5464 [08:25<16:49,  3.83it/s]

epoch 4 step 1599 loss tensor(0.0765, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1366/1366 [01:06<00:00, 20.68it/s]
 29%|██▉       | 1600/5464 [09:32<21:33:53, 20.09s/it]

epoch  4 step 1599 acc  0.6522693997071742
Early Stopping record count 1 / 6 Will stop Max eval acc 0.6526354319180088


 33%|███▎      | 1800/5464 [10:24<16:11,  3.77it/s]   

epoch 4 step 1799 loss tensor(0.0858, device='cuda:0', grad_fn=<DivBackward0>)


 37%|███▋      | 2000/5464 [11:16<15:15,  3.78it/s]

epoch 4 step 1999 loss tensor(0.1046, device='cuda:0', grad_fn=<DivBackward0>)


 40%|████      | 2200/5464 [12:09<14:26,  3.77it/s]

epoch 4 step 2199 loss tensor(0.1334, device='cuda:0', grad_fn=<DivBackward0>)


 44%|████▍     | 2399/5464 [13:01<13:21,  3.83it/s]

epoch 4 step 2399 loss tensor(0.1421, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1366/1366 [01:06<00:00, 20.69it/s]


epoch  4 step 2399 acc  0.6614202049780381
Update Max eval acc 0.6614202049780381


 48%|████▊     | 2600/5464 [15:18<12:38,  3.78it/s]   

epoch 4 step 2599 loss tensor(0.1459, device='cuda:0', grad_fn=<DivBackward0>)


 51%|█████     | 2800/5464 [16:11<11:44,  3.78it/s]

epoch 4 step 2799 loss tensor(0.1704, device='cuda:0', grad_fn=<DivBackward0>)


 55%|█████▍    | 3000/5464 [17:03<10:51,  3.78it/s]

epoch 4 step 2999 loss tensor(0.1879, device='cuda:0', grad_fn=<DivBackward0>)


 59%|█████▊    | 3199/5464 [17:55<09:49,  3.84it/s]

epoch 4 step 3199 loss tensor(0.0864, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1366/1366 [01:05<00:00, 20.73it/s]


epoch  4 step 3199 acc  0.664714494875549
Update Max eval acc 0.664714494875549


 62%|██████▏   | 3400/5464 [20:05<09:06,  3.78it/s]   

epoch 4 step 3399 loss tensor(0.0798, device='cuda:0', grad_fn=<DivBackward0>)


 66%|██████▌   | 3600/5464 [20:57<08:13,  3.78it/s]

epoch 4 step 3599 loss tensor(0.1628, device='cuda:0', grad_fn=<DivBackward0>)


 70%|██████▉   | 3800/5464 [21:50<07:20,  3.78it/s]

epoch 4 step 3799 loss tensor(0.1172, device='cuda:0', grad_fn=<DivBackward0>)


 73%|███████▎  | 3999/5464 [22:42<06:22,  3.83it/s]

epoch 4 step 3999 loss tensor(0.1796, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1366/1366 [01:05<00:00, 20.73it/s]
 73%|███████▎  | 4000/5464 [23:48<8:08:53, 20.04s/it]

epoch  4 step 3999 acc  0.6595900439238653
Early Stopping record count 1 / 6 Will stop Max eval acc 0.664714494875549


 77%|███████▋  | 4200/5464 [24:40<05:35,  3.77it/s]  

epoch 4 step 4199 loss tensor(0.1640, device='cuda:0', grad_fn=<DivBackward0>)


 81%|████████  | 4400/5464 [25:32<04:41,  3.78it/s]

epoch 4 step 4399 loss tensor(0.1938, device='cuda:0', grad_fn=<DivBackward0>)


 84%|████████▍ | 4600/5464 [26:25<03:48,  3.78it/s]

epoch 4 step 4599 loss tensor(0.2130, device='cuda:0', grad_fn=<DivBackward0>)


 88%|████████▊ | 4799/5464 [27:17<02:53,  3.83it/s]

epoch 4 step 4799 loss tensor(0.1974, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1366/1366 [01:05<00:00, 20.71it/s]
 88%|████████▊ | 4800/5464 [28:23<3:41:56, 20.06s/it]

epoch  4 step 4799 acc  0.6628843338213762
Early Stopping record count 2 / 6 Will stop Max eval acc 0.664714494875549


 92%|█████████▏| 5000/5464 [29:15<02:02,  3.78it/s]  

epoch 4 step 4999 loss tensor(0.1977, device='cuda:0', grad_fn=<DivBackward0>)


 95%|█████████▌| 5200/5464 [30:08<01:09,  3.79it/s]

epoch 4 step 5199 loss tensor(0.1704, device='cuda:0', grad_fn=<DivBackward0>)


 99%|█████████▉| 5400/5464 [31:00<00:16,  3.77it/s]

epoch 4 step 5399 loss tensor(0.1370, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 5464/5464 [31:16<00:00,  2.91it/s]


epoch 4 step 5463 trainLoss: 0.037823917935408055


  4%|▎         | 200/5464 [00:52<23:07,  3.79it/s]

epoch 5 step 199 loss tensor(0.2008, device='cuda:0', grad_fn=<DivBackward0>)


  7%|▋         | 400/5464 [01:44<22:17,  3.78it/s]

epoch 5 step 399 loss tensor(0.0818, device='cuda:0', grad_fn=<DivBackward0>)


 11%|█         | 600/5464 [02:36<21:26,  3.78it/s]

epoch 5 step 599 loss tensor(0.1263, device='cuda:0', grad_fn=<DivBackward0>)


 15%|█▍        | 799/5464 [03:28<20:18,  3.83it/s]

epoch 5 step 799 loss tensor(0.1747, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1366/1366 [01:05<00:00, 20.72it/s]
 15%|█▍        | 800/5464 [04:35<25:58:36, 20.05s/it]

epoch  5 step 799 acc  0.6273792093704246
Early Stopping record count 3 / 6 Will stop Max eval acc 0.664714494875549


 18%|█▊        | 1000/5464 [05:27<19:39,  3.78it/s]  

epoch 5 step 999 loss tensor(0.1879, device='cuda:0', grad_fn=<DivBackward0>)


 22%|██▏       | 1200/5464 [06:19<18:50,  3.77it/s]

epoch 5 step 1199 loss tensor(0.1673, device='cuda:0', grad_fn=<DivBackward0>)


 26%|██▌       | 1400/5464 [07:12<17:58,  3.77it/s]

epoch 5 step 1399 loss tensor(0.1592, device='cuda:0', grad_fn=<DivBackward0>)


 29%|██▉       | 1599/5464 [08:04<16:47,  3.84it/s]

epoch 5 step 1599 loss tensor(0.1292, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1366/1366 [01:06<00:00, 20.69it/s]


epoch  5 step 1599 acc  0.6767935578330894
Update Max eval acc 0.6767935578330894


 33%|███▎      | 1800/5464 [10:14<16:06,  3.79it/s]   

epoch 5 step 1799 loss tensor(0.2716, device='cuda:0', grad_fn=<DivBackward0>)


 37%|███▋      | 2000/5464 [11:07<15:20,  3.76it/s]

epoch 5 step 1999 loss tensor(0.0566, device='cuda:0', grad_fn=<DivBackward0>)


 40%|████      | 2200/5464 [11:59<14:21,  3.79it/s]

epoch 5 step 2199 loss tensor(0.1154, device='cuda:0', grad_fn=<DivBackward0>)


 44%|████▍     | 2399/5464 [12:51<13:21,  3.83it/s]

epoch 5 step 2399 loss tensor(0.1016, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1366/1366 [01:05<00:00, 20.72it/s]
 44%|████▍     | 2400/5464 [13:57<17:03:43, 20.05s/it]

epoch  5 step 2399 acc  0.6387262079062958
Early Stopping record count 1 / 6 Will stop Max eval acc 0.6767935578330894


 48%|████▊     | 2600/5464 [14:50<12:36,  3.79it/s]   

epoch 5 step 2599 loss tensor(0.1460, device='cuda:0', grad_fn=<DivBackward0>)


 51%|█████     | 2800/5464 [15:42<11:47,  3.77it/s]

epoch 5 step 2799 loss tensor(0.1958, device='cuda:0', grad_fn=<DivBackward0>)


 55%|█████▍    | 3000/5464 [16:34<10:57,  3.75it/s]

epoch 5 step 2999 loss tensor(0.1811, device='cuda:0', grad_fn=<DivBackward0>)


 59%|█████▊    | 3199/5464 [17:27<09:50,  3.83it/s]

epoch 5 step 3199 loss tensor(0.0914, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1366/1366 [01:06<00:00, 20.66it/s]
 59%|█████▊    | 3200/5464 [18:33<12:38:37, 20.10s/it]

epoch  5 step 3199 acc  0.6497071742313324
Early Stopping record count 2 / 6 Will stop Max eval acc 0.6767935578330894


 62%|██████▏   | 3400/5464 [19:25<09:08,  3.76it/s]   

epoch 5 step 3399 loss tensor(0.2051, device='cuda:0', grad_fn=<DivBackward0>)


 66%|██████▌   | 3600/5464 [20:18<08:13,  3.78it/s]

epoch 5 step 3599 loss tensor(0.0969, device='cuda:0', grad_fn=<DivBackward0>)


 70%|██████▉   | 3800/5464 [21:10<07:22,  3.76it/s]

epoch 5 step 3799 loss tensor(0.1307, device='cuda:0', grad_fn=<DivBackward0>)


 73%|███████▎  | 3999/5464 [22:02<06:22,  3.83it/s]

epoch 5 step 3999 loss tensor(0.1319, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1366/1366 [01:06<00:00, 20.68it/s]
 73%|███████▎  | 4000/5464 [23:09<8:10:09, 20.09s/it]

epoch  5 step 3999 acc  0.6650805270863837
Early Stopping record count 3 / 6 Will stop Max eval acc 0.6767935578330894


 77%|███████▋  | 4200/5464 [24:01<05:35,  3.76it/s]  

epoch 5 step 4199 loss tensor(0.2349, device='cuda:0', grad_fn=<DivBackward0>)


 81%|████████  | 4400/5464 [24:54<04:43,  3.75it/s]

epoch 5 step 4399 loss tensor(0.1120, device='cuda:0', grad_fn=<DivBackward0>)


 84%|████████▍ | 4600/5464 [25:46<03:49,  3.76it/s]

epoch 5 step 4599 loss tensor(0.0680, device='cuda:0', grad_fn=<DivBackward0>)


 88%|████████▊ | 4799/5464 [26:38<02:54,  3.82it/s]

epoch 5 step 4799 loss tensor(0.0327, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1366/1366 [01:06<00:00, 20.69it/s]


epoch  5 step 4799 acc  0.6789897510980967
Update Max eval acc 0.6789897510980967


 92%|█████████▏| 5000/5464 [28:49<02:02,  3.78it/s]  

epoch 5 step 4999 loss tensor(0.2142, device='cuda:0', grad_fn=<DivBackward0>)


 95%|█████████▌| 5200/5464 [29:42<01:10,  3.77it/s]

epoch 5 step 5199 loss tensor(0.1434, device='cuda:0', grad_fn=<DivBackward0>)


 99%|█████████▉| 5400/5464 [30:34<00:16,  3.78it/s]

epoch 5 step 5399 loss tensor(0.2357, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 5464/5464 [30:51<00:00,  2.95it/s]


epoch 5 step 5463 trainLoss: 0.03577397147903974


  4%|▎         | 200/5464 [00:52<23:15,  3.77it/s]

epoch 6 step 199 loss tensor(0.1102, device='cuda:0', grad_fn=<DivBackward0>)


  7%|▋         | 400/5464 [01:44<22:22,  3.77it/s]

epoch 6 step 399 loss tensor(0.1334, device='cuda:0', grad_fn=<DivBackward0>)


 11%|█         | 600/5464 [02:37<21:32,  3.76it/s]

epoch 6 step 599 loss tensor(0.0887, device='cuda:0', grad_fn=<DivBackward0>)


 15%|█▍        | 799/5464 [03:29<20:14,  3.84it/s]

epoch 6 step 799 loss tensor(0.2400, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1366/1366 [01:06<00:00, 20.68it/s]
 15%|█▍        | 800/5464 [04:35<26:01:14, 20.08s/it]

epoch  6 step 799 acc  0.6731332357247438
Early Stopping record count 1 / 6 Will stop Max eval acc 0.6789897510980967


 18%|█▊        | 1000/5464 [05:27<19:42,  3.78it/s]  

epoch 6 step 999 loss tensor(0.1442, device='cuda:0', grad_fn=<DivBackward0>)


 22%|██▏       | 1200/5464 [06:20<18:52,  3.77it/s]

epoch 6 step 1199 loss tensor(0.0551, device='cuda:0', grad_fn=<DivBackward0>)


 26%|██▌       | 1400/5464 [07:12<17:57,  3.77it/s]

epoch 6 step 1399 loss tensor(0.1671, device='cuda:0', grad_fn=<DivBackward0>)


 29%|██▉       | 1599/5464 [08:04<16:50,  3.83it/s]

epoch 6 step 1599 loss tensor(0.1963, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1366/1366 [01:05<00:00, 20.72it/s]
 29%|██▉       | 1600/5464 [09:10<21:31:12, 20.05s/it]

epoch  6 step 1599 acc  0.6537335285505125
Early Stopping record count 2 / 6 Will stop Max eval acc 0.6789897510980967


 33%|███▎      | 1800/5464 [10:02<16:10,  3.77it/s]   

epoch 6 step 1799 loss tensor(0.1488, device='cuda:0', grad_fn=<DivBackward0>)


 37%|███▋      | 2000/5464 [10:55<15:18,  3.77it/s]

epoch 6 step 1999 loss tensor(0.1750, device='cuda:0', grad_fn=<DivBackward0>)


 40%|████      | 2200/5464 [11:47<14:25,  3.77it/s]

epoch 6 step 2199 loss tensor(0.1011, device='cuda:0', grad_fn=<DivBackward0>)


 44%|████▍     | 2399/5464 [12:39<13:19,  3.83it/s]

epoch 6 step 2399 loss tensor(0.1460, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1366/1366 [01:05<00:00, 20.73it/s]


epoch  6 step 2399 acc  0.6797218155197657
Update Max eval acc 0.6797218155197657


 48%|████▊     | 2600/5464 [14:50<12:37,  3.78it/s]   

epoch 6 step 2599 loss tensor(0.2423, device='cuda:0', grad_fn=<DivBackward0>)


 51%|█████     | 2800/5464 [15:43<11:45,  3.77it/s]

epoch 6 step 2799 loss tensor(0.2009, device='cuda:0', grad_fn=<DivBackward0>)


 55%|█████▍    | 3000/5464 [16:35<10:51,  3.78it/s]

epoch 6 step 2999 loss tensor(0.1701, device='cuda:0', grad_fn=<DivBackward0>)


 59%|█████▊    | 3199/5464 [17:27<09:52,  3.82it/s]

epoch 6 step 3199 loss tensor(0.1278, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1366/1366 [01:05<00:00, 20.73it/s]
 59%|█████▊    | 3200/5464 [18:33<12:36:20, 20.04s/it]

epoch  6 step 3199 acc  0.6515373352855052
Early Stopping record count 1 / 6 Will stop Max eval acc 0.6797218155197657


 62%|██████▏   | 3400/5464 [19:26<09:06,  3.78it/s]   

epoch 6 step 3399 loss tensor(0.1675, device='cuda:0', grad_fn=<DivBackward0>)


 66%|██████▌   | 3600/5464 [20:18<08:11,  3.79it/s]

epoch 6 step 3599 loss tensor(0.1549, device='cuda:0', grad_fn=<DivBackward0>)


 70%|██████▉   | 3800/5464 [21:10<07:20,  3.77it/s]

epoch 6 step 3799 loss tensor(0.1754, device='cuda:0', grad_fn=<DivBackward0>)


 73%|███████▎  | 3999/5464 [22:02<06:22,  3.83it/s]

epoch 6 step 3999 loss tensor(0.0874, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1366/1366 [01:06<00:00, 20.69it/s]
 73%|███████▎  | 4000/5464 [23:09<8:09:52, 20.08s/it]

epoch  6 step 3999 acc  0.6764275256222547
Early Stopping record count 2 / 6 Will stop Max eval acc 0.6797218155197657


 77%|███████▋  | 4200/5464 [24:01<05:34,  3.77it/s]  

epoch 6 step 4199 loss tensor(0.0718, device='cuda:0', grad_fn=<DivBackward0>)


 81%|████████  | 4400/5464 [24:53<04:42,  3.76it/s]

epoch 6 step 4399 loss tensor(0.1086, device='cuda:0', grad_fn=<DivBackward0>)


 84%|████████▍ | 4600/5464 [25:46<03:49,  3.77it/s]

epoch 6 step 4599 loss tensor(0.1470, device='cuda:0', grad_fn=<DivBackward0>)


 88%|████████▊ | 4799/5464 [26:38<02:53,  3.83it/s]

epoch 6 step 4799 loss tensor(0.1531, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1366/1366 [01:05<00:00, 20.70it/s]
 88%|████████▊ | 4800/5464 [27:44<3:42:04, 20.07s/it]

epoch  6 step 4799 acc  0.6654465592972182
Early Stopping record count 3 / 6 Will stop Max eval acc 0.6797218155197657


 92%|█████████▏| 5000/5464 [28:37<02:03,  3.77it/s]  

epoch 6 step 4999 loss tensor(0.1364, device='cuda:0', grad_fn=<DivBackward0>)


 95%|█████████▌| 5200/5464 [29:29<01:10,  3.76it/s]

epoch 6 step 5199 loss tensor(0.1106, device='cuda:0', grad_fn=<DivBackward0>)


 99%|█████████▉| 5400/5464 [30:22<00:17,  3.76it/s]

epoch 6 step 5399 loss tensor(0.0911, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 5464/5464 [30:38<00:00,  2.97it/s]


epoch 6 step 5463 trainLoss: 0.03373185391511852


  4%|▎         | 200/5464 [00:52<23:16,  3.77it/s]

epoch 7 step 199 loss tensor(0.1922, device='cuda:0', grad_fn=<DivBackward0>)


  7%|▋         | 400/5464 [01:44<22:20,  3.78it/s]

epoch 7 step 399 loss tensor(0.0534, device='cuda:0', grad_fn=<DivBackward0>)


 11%|█         | 600/5464 [02:37<21:31,  3.77it/s]

epoch 7 step 599 loss tensor(0.0788, device='cuda:0', grad_fn=<DivBackward0>)


 15%|█▍        | 799/5464 [03:29<20:20,  3.82it/s]

epoch 7 step 799 loss tensor(0.1439, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1366/1366 [01:06<00:00, 20.69it/s]
 15%|█▍        | 800/5464 [04:35<26:01:07, 20.08s/it]

epoch  7 step 799 acc  0.6650805270863837
Early Stopping record count 4 / 6 Will stop Max eval acc 0.6797218155197657


 18%|█▊        | 1000/5464 [05:28<19:41,  3.78it/s]  

epoch 7 step 999 loss tensor(0.2886, device='cuda:0', grad_fn=<DivBackward0>)


 22%|██▏       | 1200/5464 [06:20<18:50,  3.77it/s]

epoch 7 step 1199 loss tensor(0.0859, device='cuda:0', grad_fn=<DivBackward0>)


 26%|██▌       | 1400/5464 [07:12<17:58,  3.77it/s]

epoch 7 step 1399 loss tensor(0.1139, device='cuda:0', grad_fn=<DivBackward0>)


 29%|██▉       | 1599/5464 [08:05<16:46,  3.84it/s]

epoch 7 step 1599 loss tensor(0.0942, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1366/1366 [01:05<00:00, 20.70it/s]
 29%|██▉       | 1600/5464 [09:11<21:32:30, 20.07s/it]

epoch  7 step 1599 acc  0.6592240117130308
Early Stopping record count 5 / 6 Will stop Max eval acc 0.6797218155197657


 33%|███▎      | 1800/5464 [10:03<16:10,  3.78it/s]   

epoch 7 step 1799 loss tensor(0.1113, device='cuda:0', grad_fn=<DivBackward0>)


 37%|███▋      | 2000/5464 [10:56<15:18,  3.77it/s]

epoch 7 step 1999 loss tensor(0.1701, device='cuda:0', grad_fn=<DivBackward0>)


 40%|████      | 2200/5464 [11:48<14:25,  3.77it/s]

epoch 7 step 2199 loss tensor(0.0709, device='cuda:0', grad_fn=<DivBackward0>)


 44%|████▍     | 2399/5464 [12:40<13:19,  3.83it/s]

epoch 7 step 2399 loss tensor(0.0580, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 1366/1366 [01:05<00:00, 20.73it/s]
 44%|████▍     | 2399/5464 [13:46<17:36,  2.90it/s]


epoch  7 step 2399 acc  0.6346998535871157
Early Stopping:Epoch 7  Step 2399 Eval_acc 0.6346998535871157
epoch 7 step 2399 trainLoss: 0.0134672060180099


('GPT2saved_models/tokenizer_config.json',
 'GPT2saved_models/special_tokens_map.json',
 'GPT2saved_models/vocab.json',
 'GPT2saved_models/merges.txt',
 'GPT2saved_models/added_tokens.json')

In [8]:
!source attack.sh

[31m╭─[0m[31m────────────────────[0m[31m [0m[1;31mTraceback [0m[1;2;31m(most recent call last)[0m[31m [0m[31m─────────────────────[0m[31m─╮[0m
[31m│[0m [2;33m/home/ljc/desktop/neutral-attack-for-pretrained-models/GPT2/Defect-detection[0m [31m│[0m
[31m│[0m [2;33m/code/[0m[1;33mattack.py[0m:[94m195[0m in [92m<module>[0m                                              [31m│[0m
[31m│[0m                                                                              [31m│[0m
[31m│[0m   [2m192 [0m                                                                       [31m│[0m
[31m│[0m   [2m193 [0m                                                                       [31m│[0m
[31m│[0m   [2m194 [0m[94mif[0m [91m__name__[0m == [33m'[0m[33m__main__[0m[33m'[0m:                                             [31m│[0m
[31m│[0m [31m❱ [0m195 [2m│   [0mmain()                                                             [31m│[0m
[31m│[0m 