In [3]:
'''
对抗训练
1. adversarial training
2. adversarial fine-tuning
'''
import pandas as pd
import numpy as np
import json
from datasets import Dataset
from torch.utils.data import DataLoader
import torch
from tqdm import tqdm
from sklearn.metrics import f1_score
import os
from torch.functional import F
from transformers import GPT2Tokenizer,GPT2ForSequenceClassification
from transformers import AdamW,get_linear_schedule_with_warmup
from datasets import load_dataset

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
def adversari_training(model,tokenizer,train_data_path,valid_data_path,train_batch_size,eval_batch_size,num_epochs,lr,early_stopping,outputdir,trainlogdir,adv_training_data_path,training_or_finetuning = "training"):
    '''
    当training_or_finetuning == "fine-tuning"时,进行adversarial fine-tuning
    否则,进行adversarial training
    '''
    def tokenize_function(examples):
        return tokenizer(examples["text"],truncation = True,padding=True) 
    def collate_fn(examples):
        return tokenizer.pad(examples, padding="max_length", return_tensors="pt")
    traindataset = load_dataset("csv",data_files=train_data_path)["train"]
    validdataset = load_dataset("csv",data_files=valid_data_path)["train"]
    if training_or_finetuning == "training":
        advdataset = load_dataset("csv",data_files=adv_training_data_path)["train"]
        for i in advdataset:
            traindataset = traindataset.add_item(i)
            
    elif training_or_finetuning == "fine-tuning":
        advdataset = load_dataset("csv",data_files=adv_training_data_path)["train"]
        advdataset = advdataset.map(tokenize_function,batched=True,remove_columns=["text"]).rename_column("label","labels")
        adv_dataloader = DataLoader(advdataset , collate_fn=collate_fn,batch_size = train_batch_size)
    
    train_tokenized_dataset = traindataset.map(tokenize_function,batched=True,remove_columns=["text"]).rename_column("label","labels")
    valid_tokenized_dataset = validdataset.map(tokenize_function,batched=True,remove_columns=["text"]).rename_column("label","labels")
    train_dataloader = DataLoader(train_tokenized_dataset,shuffle=True,collate_fn=collate_fn,batch_size = train_batch_size)
    eval_dataloader = DataLoader(valid_tokenized_dataset , collate_fn=collate_fn,batch_size = eval_batch_size)
        
    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    model.to(device)
    model.resize_token_embeddings(len(tokenizer))
    trainlogdf = pd.DataFrame(columns=["step","trainloss","validloss","acc","f1-score"])
    rowindex = 0
    eval_no_progress_count = 0
    epochloss = []
    max_eval_acc = 0
    optimizer = AdamW(params=model.parameters(), lr=lr)
    iter_to_accumlate = 4
    # Instantiate scheduler

    lr_scheduler = get_linear_schedule_with_warmup(
        optimizer=optimizer,
        num_warmup_steps=0.06 * (len(train_dataloader) * num_epochs),
        num_training_steps=(len(train_dataloader) * num_epochs),
    )
    if training_or_finetuning == "fine-tuning":
        train_dataloader = adv_dataloader
        optimizer.load_state_dict(torch.load("checkpoint/optimizer.bin"))
        lr_scheduler.load_state_dict(torch.load("checkpoint/lr_scheduler.bin"))
    for epoch in range(num_epochs):
        model.train()
        allloss = 0
        for step,batch in enumerate(tqdm(train_dataloader)):
            batch.to(device)
            outputs = model(**batch)
            loss = outputs.loss/iter_to_accumlate
            loss.backward()
            allloss += loss.item()
            trainlogdf.loc[rowindex] = [rowindex,loss.item(),None,None,None]
            rowindex += 1
            epochloss.append(loss.item())
            if (step+1)%iter_to_accumlate==0:
                optimizer.step()
                lr_scheduler.step()
                optimizer.zero_grad()
            if (step+1)%(10*iter_to_accumlate) == 0:
                print("epoch",epoch,"step",step,"loss",loss,sep=" ")
        print("epoch",epoch,"step",step,"trainLoss:",allloss/(len(train_dataloader)*train_batch_size))
        
        count = 0
        model.eval()
        validloss = 0
        preds = []
        labels = []
        for evalstep,batch in enumerate(tqdm(eval_dataloader)):
            labels += batch['labels'].cpu()
            batch.to(device)
            with torch.no_grad():
                output = model(**batch)
            validloss += output.loss.item()
            pred = torch.argmax(F.softmax(output.logits.cpu(),dim=1),dim=1)
            preds += pred
            count += int(sum(batch['labels'].cpu() == pred))
        model.train()
        eval_acc = count/132
        trainlogdf.loc[rowindex-1,"validloss"] = validloss/132
        trainlogdf.loc[rowindex-1,"acc"] = eval_acc
        trainlogdf.loc[rowindex-1,"f1-score"] = f1_score(np.array(labels),np.array(preds),average="macro")
        print("epoch ",epoch,"step",step,"acc ",eval_acc)
        if eval_acc < max_eval_acc:
            eval_no_progress_count += 1
            if eval_no_progress_count >=early_stopping:
                print("Early Stopping:Epoch",epoch," Step",step,"Eval_acc",eval_acc,sep=" ")
                break
            else:
                print("Early Stopping record count",eval_no_progress_count,"Max eval acc",max_eval_acc,sep=" ")
        if eval_acc > max_eval_acc: 
            max_eval_acc = eval_acc
            print("Update Max eval acc",max_eval_acc)
            eval_no_progress_count = 0
            model.save_pretrained(outputdir)
            torch.save(model.state_dict(),os.path.join("checkpoint","model.bin"))
            torch.save(optimizer.state_dict(),os.path.join("checkpoint","optimizer.bin"))
            torch.save(lr_scheduler.state_dict(),os.path.join("checkpoint","lr_scheduler.bin"))
        
    trainlogdf.to_csv(trainlogdir)
    tokenizer.save_pretrained(outputdir)



In [3]:
model_path = "../../gpt2/"
model = GPT2ForSequenceClassification.from_pretrained(model_path,num_labels = 66)
tokenizer = GPT2Tokenizer.from_pretrained(model_path,bos_token = "<|startoftext|>",eos_token = "<|endoftext|>",pad_token = "<|pad|>",cls_token = "<|cls|>",sep_token = "<|sep|>" ,model_max_length = 1024)
adversari_training(model,tokenizer,"../dataset/data_folder/processed_gcjpy/train.csv","../dataset/data_folder/processed_gcjpy/valid.csv",2,2,30,5e-5,5,"GPT2-ADV-TRAINING","adv-training.log","../dataset/data_folder/processed_gcjpy/adv_training.csv","training")

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at ../../gpt2/ and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Found cached dataset csv (/home/ljc/.cache/huggingface/datasets/csv/default-b125add5293cdc5e/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1)
100%|██████████| 1/1 [00:00<00:00, 996.98it/s]
Found cached dataset csv (/home/ljc/.cache/huggingface/datasets/csv/default-ab07e3df6ac105f7/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1)
100%|██████████| 1/1 [00:00<00:00, 1248.30it/s]
Found cached dataset csv (/home/ljc/.cache/huggingface/datasets/csv/default-f03b36a028be774d/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1)
100%|██████████| 1/1 [00:00<0

epoch 0 step 39 loss tensor(1.6030, device='cuda:0', grad_fn=<DivBackward0>)


 15%|█▌        | 81/528 [00:13<01:11,  6.28it/s]

epoch 0 step 79 loss tensor(1.2158, device='cuda:0', grad_fn=<DivBackward0>)


 23%|██▎       | 121/528 [00:19<01:05,  6.26it/s]

epoch 0 step 119 loss tensor(3.1086, device='cuda:0', grad_fn=<DivBackward0>)


 30%|███       | 161/528 [00:26<00:58,  6.25it/s]

epoch 0 step 159 loss tensor(1.2155, device='cuda:0', grad_fn=<DivBackward0>)


 38%|███▊      | 201/528 [00:32<00:56,  5.76it/s]

epoch 0 step 199 loss tensor(1.1915, device='cuda:0', grad_fn=<DivBackward0>)


 46%|████▌     | 241/528 [00:39<00:46,  6.23it/s]

epoch 0 step 239 loss tensor(1.3860, device='cuda:0', grad_fn=<DivBackward0>)


 53%|█████▎    | 281/528 [00:45<00:39,  6.23it/s]

epoch 0 step 279 loss tensor(4.2932, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 321/528 [00:51<00:33,  6.21it/s]

epoch 0 step 319 loss tensor(1.5742, device='cuda:0', grad_fn=<DivBackward0>)


 68%|██████▊   | 361/528 [00:58<00:26,  6.24it/s]

epoch 0 step 359 loss tensor(1.0317, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 401/528 [01:04<00:20,  6.22it/s]

epoch 0 step 399 loss tensor(1.1473, device='cuda:0', grad_fn=<DivBackward0>)


 84%|████████▎ | 441/528 [01:10<00:12,  6.86it/s]

epoch 0 step 439 loss tensor(1.1470, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████ | 481/528 [01:17<00:07,  6.22it/s]

epoch 0 step 479 loss tensor(0.7629, device='cuda:0', grad_fn=<DivBackward0>)


 99%|█████████▊| 521/528 [01:23<00:01,  6.20it/s]

epoch 0 step 519 loss tensor(1.0916, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 528/528 [01:25<00:00,  6.21it/s]


epoch 0 step 527 trainLoss: 0.7784213191180518


100%|██████████| 66/66 [00:03<00:00, 18.10it/s]


epoch  0 step 527 acc  0.03787878787878788
Update Max eval acc 0.03787878787878788


  8%|▊         | 40/528 [00:06<01:19,  6.14it/s]

epoch 1 step 39 loss tensor(1.0939, device='cuda:0', grad_fn=<DivBackward0>)


 15%|█▌        | 81/528 [00:13<01:11,  6.23it/s]

epoch 1 step 79 loss tensor(1.0244, device='cuda:0', grad_fn=<DivBackward0>)


 23%|██▎       | 121/528 [00:19<01:05,  6.20it/s]

epoch 1 step 119 loss tensor(1.0182, device='cuda:0', grad_fn=<DivBackward0>)


 30%|███       | 161/528 [00:25<00:59,  6.20it/s]

epoch 1 step 159 loss tensor(1.1363, device='cuda:0', grad_fn=<DivBackward0>)


 38%|███▊      | 201/528 [00:32<00:52,  6.19it/s]

epoch 1 step 199 loss tensor(1.0590, device='cuda:0', grad_fn=<DivBackward0>)


 46%|████▌     | 241/528 [00:38<00:46,  6.19it/s]

epoch 1 step 239 loss tensor(0.9825, device='cuda:0', grad_fn=<DivBackward0>)


 53%|█████▎    | 281/528 [00:45<00:39,  6.20it/s]

epoch 1 step 279 loss tensor(1.0313, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 321/528 [00:51<00:33,  6.20it/s]

epoch 1 step 319 loss tensor(0.9311, device='cuda:0', grad_fn=<DivBackward0>)


 68%|██████▊   | 361/528 [00:58<00:26,  6.21it/s]

epoch 1 step 359 loss tensor(1.0274, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 401/528 [01:04<00:20,  6.20it/s]

epoch 1 step 399 loss tensor(1.0833, device='cuda:0', grad_fn=<DivBackward0>)


 84%|████████▎ | 441/528 [01:10<00:14,  6.18it/s]

epoch 1 step 439 loss tensor(1.0581, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████ | 481/528 [01:17<00:07,  6.20it/s]

epoch 1 step 479 loss tensor(0.9885, device='cuda:0', grad_fn=<DivBackward0>)


 99%|█████████▊| 521/528 [01:23<00:01,  6.21it/s]

epoch 1 step 519 loss tensor(1.0576, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 528/528 [01:24<00:00,  6.22it/s]


epoch 1 step 527 trainLoss: 0.5407884885190111


100%|██████████| 66/66 [00:03<00:00, 18.09it/s]


epoch  1 step 527 acc  0.03787878787878788


  8%|▊         | 41/528 [00:06<01:18,  6.19it/s]

epoch 2 step 39 loss tensor(1.0321, device='cuda:0', grad_fn=<DivBackward0>)


 15%|█▌        | 81/528 [00:12<01:12,  6.21it/s]

epoch 2 step 79 loss tensor(0.9812, device='cuda:0', grad_fn=<DivBackward0>)


 23%|██▎       | 121/528 [00:19<01:05,  6.18it/s]

epoch 2 step 119 loss tensor(1.1694, device='cuda:0', grad_fn=<DivBackward0>)


 30%|███       | 161/528 [00:25<00:59,  6.20it/s]

epoch 2 step 159 loss tensor(0.9902, device='cuda:0', grad_fn=<DivBackward0>)


 38%|███▊      | 201/528 [00:32<00:52,  6.19it/s]

epoch 2 step 199 loss tensor(1.0612, device='cuda:0', grad_fn=<DivBackward0>)


 46%|████▌     | 241/528 [00:38<00:46,  6.22it/s]

epoch 2 step 239 loss tensor(1.2085, device='cuda:0', grad_fn=<DivBackward0>)


 53%|█████▎    | 281/528 [00:45<00:39,  6.19it/s]

epoch 2 step 279 loss tensor(1.0049, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 321/528 [00:51<00:33,  6.20it/s]

epoch 2 step 319 loss tensor(1.0859, device='cuda:0', grad_fn=<DivBackward0>)


 68%|██████▊   | 361/528 [00:58<00:26,  6.20it/s]

epoch 2 step 359 loss tensor(1.1305, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 401/528 [01:04<00:20,  6.18it/s]

epoch 2 step 399 loss tensor(0.9676, device='cuda:0', grad_fn=<DivBackward0>)


 84%|████████▎ | 441/528 [01:10<00:14,  6.20it/s]

epoch 2 step 439 loss tensor(0.8031, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████ | 481/528 [01:17<00:07,  6.21it/s]

epoch 2 step 479 loss tensor(1.0222, device='cuda:0', grad_fn=<DivBackward0>)


 99%|█████████▊| 521/528 [01:23<00:01,  6.19it/s]

epoch 2 step 519 loss tensor(1.1127, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 528/528 [01:24<00:00,  6.22it/s]


epoch 2 step 527 trainLoss: 0.5132024085544276


100%|██████████| 66/66 [00:03<00:00, 18.11it/s]


epoch  2 step 527 acc  0.05303030303030303
Update Max eval acc 0.05303030303030303


  8%|▊         | 41/528 [00:06<01:17,  6.26it/s]

epoch 3 step 39 loss tensor(0.9273, device='cuda:0', grad_fn=<DivBackward0>)


 15%|█▌        | 81/528 [00:12<01:11,  6.24it/s]

epoch 3 step 79 loss tensor(1.1085, device='cuda:0', grad_fn=<DivBackward0>)


 23%|██▎       | 121/528 [00:19<01:05,  6.22it/s]

epoch 3 step 119 loss tensor(0.9601, device='cuda:0', grad_fn=<DivBackward0>)


 30%|███       | 161/528 [00:25<00:59,  6.21it/s]

epoch 3 step 159 loss tensor(1.1359, device='cuda:0', grad_fn=<DivBackward0>)


 38%|███▊      | 201/528 [00:32<00:52,  6.20it/s]

epoch 3 step 199 loss tensor(1.0851, device='cuda:0', grad_fn=<DivBackward0>)


 46%|████▌     | 241/528 [00:38<00:46,  6.19it/s]

epoch 3 step 239 loss tensor(1.0802, device='cuda:0', grad_fn=<DivBackward0>)


 53%|█████▎    | 281/528 [00:44<00:39,  6.20it/s]

epoch 3 step 279 loss tensor(1.0471, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 321/528 [00:51<00:33,  6.21it/s]

epoch 3 step 319 loss tensor(1.1696, device='cuda:0', grad_fn=<DivBackward0>)


 68%|██████▊   | 361/528 [00:57<00:26,  6.21it/s]

epoch 3 step 359 loss tensor(1.0457, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 401/528 [01:04<00:20,  6.20it/s]

epoch 3 step 399 loss tensor(1.0853, device='cuda:0', grad_fn=<DivBackward0>)


 84%|████████▎ | 441/528 [01:10<00:14,  6.21it/s]

epoch 3 step 439 loss tensor(1.1387, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████ | 481/528 [01:17<00:07,  6.22it/s]

epoch 3 step 479 loss tensor(1.0794, device='cuda:0', grad_fn=<DivBackward0>)


 99%|█████████▊| 521/528 [01:23<00:01,  6.18it/s]

epoch 3 step 519 loss tensor(0.9673, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 528/528 [01:24<00:00,  6.24it/s]


epoch 3 step 527 trainLoss: 0.49402363496747886


100%|██████████| 66/66 [00:03<00:00, 18.08it/s]


epoch  3 step 527 acc  0.07575757575757576
Update Max eval acc 0.07575757575757576


  8%|▊         | 41/528 [00:06<01:18,  6.24it/s]

epoch 4 step 39 loss tensor(0.9226, device='cuda:0', grad_fn=<DivBackward0>)


 15%|█▌        | 81/528 [00:12<01:11,  6.25it/s]

epoch 4 step 79 loss tensor(0.8713, device='cuda:0', grad_fn=<DivBackward0>)


 23%|██▎       | 121/528 [00:19<01:05,  6.24it/s]

epoch 4 step 119 loss tensor(0.9791, device='cuda:0', grad_fn=<DivBackward0>)


 30%|███       | 161/528 [00:25<00:59,  6.21it/s]

epoch 4 step 159 loss tensor(1.0769, device='cuda:0', grad_fn=<DivBackward0>)


 38%|███▊      | 201/528 [00:32<00:52,  6.21it/s]

epoch 4 step 199 loss tensor(0.9904, device='cuda:0', grad_fn=<DivBackward0>)


 46%|████▌     | 241/528 [00:38<00:48,  5.92it/s]

epoch 4 step 239 loss tensor(0.9842, device='cuda:0', grad_fn=<DivBackward0>)


 53%|█████▎    | 281/528 [00:45<00:39,  6.24it/s]

epoch 4 step 279 loss tensor(0.7608, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 321/528 [00:51<00:33,  6.18it/s]

epoch 4 step 319 loss tensor(0.9483, device='cuda:0', grad_fn=<DivBackward0>)


 68%|██████▊   | 361/528 [00:57<00:26,  6.21it/s]

epoch 4 step 359 loss tensor(1.0174, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 401/528 [01:04<00:20,  6.19it/s]

epoch 4 step 399 loss tensor(0.9753, device='cuda:0', grad_fn=<DivBackward0>)


 84%|████████▎ | 441/528 [01:10<00:14,  6.19it/s]

epoch 4 step 439 loss tensor(1.0332, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████ | 481/528 [01:17<00:07,  6.27it/s]

epoch 4 step 479 loss tensor(0.9203, device='cuda:0', grad_fn=<DivBackward0>)


 99%|█████████▊| 521/528 [01:23<00:01,  6.17it/s]

epoch 4 step 519 loss tensor(0.6333, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 528/528 [01:24<00:00,  6.23it/s]


epoch 4 step 527 trainLoss: 0.4663372152118069


100%|██████████| 66/66 [00:03<00:00, 17.83it/s]


epoch  4 step 527 acc  0.1893939393939394
Update Max eval acc 0.1893939393939394


  8%|▊         | 41/528 [00:06<01:18,  6.24it/s]

epoch 5 step 39 loss tensor(0.8255, device='cuda:0', grad_fn=<DivBackward0>)


 15%|█▌        | 81/528 [00:12<01:11,  6.22it/s]

epoch 5 step 79 loss tensor(0.6873, device='cuda:0', grad_fn=<DivBackward0>)


 23%|██▎       | 121/528 [00:19<01:06,  6.09it/s]

epoch 5 step 119 loss tensor(0.6612, device='cuda:0', grad_fn=<DivBackward0>)


 30%|███       | 161/528 [00:25<00:58,  6.23it/s]

epoch 5 step 159 loss tensor(0.5052, device='cuda:0', grad_fn=<DivBackward0>)


 38%|███▊      | 201/528 [00:32<00:52,  6.21it/s]

epoch 5 step 199 loss tensor(0.9252, device='cuda:0', grad_fn=<DivBackward0>)


 46%|████▌     | 241/528 [00:38<00:46,  6.22it/s]

epoch 5 step 239 loss tensor(0.6510, device='cuda:0', grad_fn=<DivBackward0>)


 53%|█████▎    | 281/528 [00:44<00:39,  6.21it/s]

epoch 5 step 279 loss tensor(0.8744, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 321/528 [00:51<00:33,  6.20it/s]

epoch 5 step 319 loss tensor(0.9655, device='cuda:0', grad_fn=<DivBackward0>)


 68%|██████▊   | 361/528 [00:57<00:27,  6.18it/s]

epoch 5 step 359 loss tensor(0.4840, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 401/528 [01:04<00:20,  6.19it/s]

epoch 5 step 399 loss tensor(0.5816, device='cuda:0', grad_fn=<DivBackward0>)


 84%|████████▎ | 441/528 [01:10<00:14,  6.21it/s]

epoch 5 step 439 loss tensor(0.5864, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████ | 481/528 [01:17<00:07,  6.20it/s]

epoch 5 step 479 loss tensor(0.5284, device='cuda:0', grad_fn=<DivBackward0>)


 99%|█████████▊| 521/528 [01:23<00:01,  6.21it/s]

epoch 5 step 519 loss tensor(0.8652, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 528/528 [01:24<00:00,  6.23it/s]


epoch 5 step 527 trainLoss: 0.37768067255841964


100%|██████████| 66/66 [00:03<00:00, 17.68it/s]


epoch  5 step 527 acc  0.38636363636363635
Update Max eval acc 0.38636363636363635


  8%|▊         | 41/528 [00:06<01:18,  6.23it/s]

epoch 6 step 39 loss tensor(0.8287, device='cuda:0', grad_fn=<DivBackward0>)


 15%|█▌        | 81/528 [00:12<01:11,  6.24it/s]

epoch 6 step 79 loss tensor(0.2897, device='cuda:0', grad_fn=<DivBackward0>)


 23%|██▎       | 121/528 [00:19<01:06,  6.15it/s]

epoch 6 step 119 loss tensor(0.4566, device='cuda:0', grad_fn=<DivBackward0>)


 30%|███       | 161/528 [00:25<00:59,  6.20it/s]

epoch 6 step 159 loss tensor(0.5397, device='cuda:0', grad_fn=<DivBackward0>)


 38%|███▊      | 201/528 [00:32<00:52,  6.23it/s]

epoch 6 step 199 loss tensor(0.3584, device='cuda:0', grad_fn=<DivBackward0>)


 46%|████▌     | 241/528 [00:38<00:46,  6.21it/s]

epoch 6 step 239 loss tensor(0.4886, device='cuda:0', grad_fn=<DivBackward0>)


 53%|█████▎    | 281/528 [00:45<00:39,  6.21it/s]

epoch 6 step 279 loss tensor(0.1544, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 321/528 [00:51<00:34,  6.03it/s]

epoch 6 step 319 loss tensor(0.5130, device='cuda:0', grad_fn=<DivBackward0>)


 68%|██████▊   | 361/528 [00:57<00:26,  6.19it/s]

epoch 6 step 359 loss tensor(0.3836, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 401/528 [01:04<00:20,  6.20it/s]

epoch 6 step 399 loss tensor(0.1391, device='cuda:0', grad_fn=<DivBackward0>)


 84%|████████▎ | 441/528 [01:10<00:13,  6.22it/s]

epoch 6 step 439 loss tensor(0.4808, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████ | 481/528 [01:17<00:07,  6.21it/s]

epoch 6 step 479 loss tensor(0.3951, device='cuda:0', grad_fn=<DivBackward0>)


 99%|█████████▊| 521/528 [01:23<00:01,  6.20it/s]

epoch 6 step 519 loss tensor(0.3825, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 528/528 [01:24<00:00,  6.23it/s]


epoch 6 step 527 trainLoss: 0.2188155376387647


100%|██████████| 66/66 [00:03<00:00, 17.84it/s]


epoch  6 step 527 acc  0.6515151515151515
Update Max eval acc 0.6515151515151515


  8%|▊         | 41/528 [00:06<01:17,  6.25it/s]

epoch 7 step 39 loss tensor(0.6758, device='cuda:0', grad_fn=<DivBackward0>)


 15%|█▌        | 81/528 [00:12<01:11,  6.23it/s]

epoch 7 step 79 loss tensor(0.3886, device='cuda:0', grad_fn=<DivBackward0>)


 23%|██▎       | 121/528 [00:19<01:05,  6.23it/s]

epoch 7 step 119 loss tensor(0.1691, device='cuda:0', grad_fn=<DivBackward0>)


 30%|███       | 161/528 [00:25<00:59,  6.20it/s]

epoch 7 step 159 loss tensor(0.3915, device='cuda:0', grad_fn=<DivBackward0>)


 38%|███▊      | 201/528 [00:32<00:52,  6.22it/s]

epoch 7 step 199 loss tensor(0.1034, device='cuda:0', grad_fn=<DivBackward0>)


 46%|████▌     | 241/528 [00:38<00:46,  6.23it/s]

epoch 7 step 239 loss tensor(0.4514, device='cuda:0', grad_fn=<DivBackward0>)


 53%|█████▎    | 281/528 [00:45<00:39,  6.20it/s]

epoch 7 step 279 loss tensor(0.0712, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 321/528 [00:51<00:33,  6.20it/s]

epoch 7 step 319 loss tensor(0.1929, device='cuda:0', grad_fn=<DivBackward0>)


 68%|██████▊   | 361/528 [00:57<00:26,  6.20it/s]

epoch 7 step 359 loss tensor(0.1449, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 401/528 [01:04<00:20,  6.21it/s]

epoch 7 step 399 loss tensor(0.1877, device='cuda:0', grad_fn=<DivBackward0>)


 84%|████████▎ | 441/528 [01:10<00:14,  6.21it/s]

epoch 7 step 439 loss tensor(0.0504, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████ | 481/528 [01:17<00:07,  6.20it/s]

epoch 7 step 479 loss tensor(0.1624, device='cuda:0', grad_fn=<DivBackward0>)


 99%|█████████▊| 521/528 [01:23<00:01,  6.20it/s]

epoch 7 step 519 loss tensor(0.0906, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 528/528 [01:24<00:00,  6.23it/s]


epoch 7 step 527 trainLoss: 0.09650088576635232


100%|██████████| 66/66 [00:03<00:00, 18.10it/s]


epoch  7 step 527 acc  0.7575757575757576
Update Max eval acc 0.7575757575757576


  8%|▊         | 41/528 [00:06<01:18,  6.22it/s]

epoch 8 step 39 loss tensor(0.4175, device='cuda:0', grad_fn=<DivBackward0>)


 15%|█▌        | 81/528 [00:13<01:11,  6.22it/s]

epoch 8 step 79 loss tensor(0.0288, device='cuda:0', grad_fn=<DivBackward0>)


 23%|██▎       | 121/528 [00:19<01:05,  6.24it/s]

epoch 8 step 119 loss tensor(0.0359, device='cuda:0', grad_fn=<DivBackward0>)


 30%|███       | 161/528 [00:25<00:58,  6.22it/s]

epoch 8 step 159 loss tensor(0.0382, device='cuda:0', grad_fn=<DivBackward0>)


 38%|███▊      | 201/528 [00:32<00:52,  6.19it/s]

epoch 8 step 199 loss tensor(0.1044, device='cuda:0', grad_fn=<DivBackward0>)


 46%|████▌     | 241/528 [00:38<00:46,  6.16it/s]

epoch 8 step 239 loss tensor(0.0617, device='cuda:0', grad_fn=<DivBackward0>)


 53%|█████▎    | 281/528 [00:45<00:39,  6.18it/s]

epoch 8 step 279 loss tensor(0.0437, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 321/528 [00:51<00:33,  6.19it/s]

epoch 8 step 319 loss tensor(0.0072, device='cuda:0', grad_fn=<DivBackward0>)


 68%|██████▊   | 361/528 [00:57<00:26,  6.20it/s]

epoch 8 step 359 loss tensor(0.0357, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 401/528 [01:04<00:20,  6.21it/s]

epoch 8 step 399 loss tensor(0.1168, device='cuda:0', grad_fn=<DivBackward0>)


 84%|████████▎ | 441/528 [01:10<00:14,  6.18it/s]

epoch 8 step 439 loss tensor(0.1766, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████ | 481/528 [01:17<00:07,  6.20it/s]

epoch 8 step 479 loss tensor(0.0386, device='cuda:0', grad_fn=<DivBackward0>)


 99%|█████████▊| 521/528 [01:23<00:01,  6.20it/s]

epoch 8 step 519 loss tensor(0.0104, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 528/528 [01:24<00:00,  6.22it/s]


epoch 8 step 527 trainLoss: 0.03692346923763222


100%|██████████| 66/66 [00:03<00:00, 18.12it/s]


epoch  8 step 527 acc  0.8257575757575758
Update Max eval acc 0.8257575757575758


  8%|▊         | 41/528 [00:06<01:18,  6.23it/s]

epoch 9 step 39 loss tensor(0.0186, device='cuda:0', grad_fn=<DivBackward0>)


 15%|█▌        | 81/528 [00:12<01:11,  6.24it/s]

epoch 9 step 79 loss tensor(0.0229, device='cuda:0', grad_fn=<DivBackward0>)


 23%|██▎       | 121/528 [00:19<01:05,  6.22it/s]

epoch 9 step 119 loss tensor(0.0457, device='cuda:0', grad_fn=<DivBackward0>)


 30%|███       | 161/528 [00:25<00:59,  6.20it/s]

epoch 9 step 159 loss tensor(0.0914, device='cuda:0', grad_fn=<DivBackward0>)


 38%|███▊      | 201/528 [00:32<00:52,  6.21it/s]

epoch 9 step 199 loss tensor(0.0329, device='cuda:0', grad_fn=<DivBackward0>)


 46%|████▌     | 241/528 [00:38<00:46,  6.23it/s]

epoch 9 step 239 loss tensor(0.0083, device='cuda:0', grad_fn=<DivBackward0>)


 53%|█████▎    | 281/528 [00:44<00:39,  6.21it/s]

epoch 9 step 279 loss tensor(0.0377, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 321/528 [00:51<00:33,  6.18it/s]

epoch 9 step 319 loss tensor(0.0157, device='cuda:0', grad_fn=<DivBackward0>)


 68%|██████▊   | 361/528 [00:57<00:26,  6.19it/s]

epoch 9 step 359 loss tensor(0.0490, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 401/528 [01:04<00:20,  6.22it/s]

epoch 9 step 399 loss tensor(0.0079, device='cuda:0', grad_fn=<DivBackward0>)


 84%|████████▎ | 441/528 [01:10<00:14,  6.09it/s]

epoch 9 step 439 loss tensor(0.0040, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████ | 481/528 [01:17<00:07,  6.21it/s]

epoch 9 step 479 loss tensor(0.0092, device='cuda:0', grad_fn=<DivBackward0>)


 99%|█████████▊| 521/528 [01:23<00:01,  6.19it/s]

epoch 9 step 519 loss tensor(0.0169, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 528/528 [01:24<00:00,  6.23it/s]


epoch 9 step 527 trainLoss: 0.018522971180738586


100%|██████████| 66/66 [00:03<00:00, 18.10it/s]


epoch  9 step 527 acc  0.8181818181818182
Early Stopping record count 1 Max eval acc 0.8257575757575758


  8%|▊         | 41/528 [00:06<01:18,  6.22it/s]

epoch 10 step 39 loss tensor(0.0311, device='cuda:0', grad_fn=<DivBackward0>)


 15%|█▌        | 81/528 [00:13<01:12,  6.20it/s]

epoch 10 step 79 loss tensor(0.0076, device='cuda:0', grad_fn=<DivBackward0>)


 23%|██▎       | 121/528 [00:19<01:05,  6.19it/s]

epoch 10 step 119 loss tensor(0.0166, device='cuda:0', grad_fn=<DivBackward0>)


 30%|███       | 161/528 [00:25<00:59,  6.19it/s]

epoch 10 step 159 loss tensor(0.0285, device='cuda:0', grad_fn=<DivBackward0>)


 38%|███▊      | 201/528 [00:32<00:52,  6.21it/s]

epoch 10 step 199 loss tensor(0.0056, device='cuda:0', grad_fn=<DivBackward0>)


 46%|████▌     | 241/528 [00:38<00:46,  6.21it/s]

epoch 10 step 239 loss tensor(0.0249, device='cuda:0', grad_fn=<DivBackward0>)


 53%|█████▎    | 281/528 [00:45<00:39,  6.19it/s]

epoch 10 step 279 loss tensor(0.0125, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 321/528 [00:51<00:35,  5.79it/s]

epoch 10 step 319 loss tensor(0.0141, device='cuda:0', grad_fn=<DivBackward0>)


 68%|██████▊   | 361/528 [00:58<00:26,  6.21it/s]

epoch 10 step 359 loss tensor(0.0068, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 401/528 [01:04<00:20,  6.20it/s]

epoch 10 step 399 loss tensor(0.0252, device='cuda:0', grad_fn=<DivBackward0>)


 84%|████████▎ | 441/528 [01:10<00:14,  6.19it/s]

epoch 10 step 439 loss tensor(0.0061, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████ | 481/528 [01:17<00:07,  6.19it/s]

epoch 10 step 479 loss tensor(0.0215, device='cuda:0', grad_fn=<DivBackward0>)


 99%|█████████▊| 521/528 [01:23<00:01,  6.19it/s]

epoch 10 step 519 loss tensor(0.0035, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 528/528 [01:24<00:00,  6.22it/s]


epoch 10 step 527 trainLoss: 0.01041984579779003


100%|██████████| 66/66 [00:03<00:00, 17.90it/s]


epoch  10 step 527 acc  0.8484848484848485
Update Max eval acc 0.8484848484848485


  8%|▊         | 41/528 [00:06<01:17,  6.25it/s]

epoch 11 step 39 loss tensor(0.0152, device='cuda:0', grad_fn=<DivBackward0>)


 15%|█▌        | 81/528 [00:12<01:11,  6.26it/s]

epoch 11 step 79 loss tensor(0.0322, device='cuda:0', grad_fn=<DivBackward0>)


 23%|██▎       | 121/528 [00:19<01:05,  6.26it/s]

epoch 11 step 119 loss tensor(0.0049, device='cuda:0', grad_fn=<DivBackward0>)


 30%|███       | 161/528 [00:25<00:59,  6.22it/s]

epoch 11 step 159 loss tensor(0.0277, device='cuda:0', grad_fn=<DivBackward0>)


 38%|███▊      | 201/528 [00:32<00:52,  6.24it/s]

epoch 11 step 199 loss tensor(0.0062, device='cuda:0', grad_fn=<DivBackward0>)


 46%|████▌     | 241/528 [00:38<00:46,  6.21it/s]

epoch 11 step 239 loss tensor(0.0087, device='cuda:0', grad_fn=<DivBackward0>)


 53%|█████▎    | 281/528 [00:44<00:39,  6.22it/s]

epoch 11 step 279 loss tensor(0.0077, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 321/528 [00:51<00:33,  6.21it/s]

epoch 11 step 319 loss tensor(0.0241, device='cuda:0', grad_fn=<DivBackward0>)


 68%|██████▊   | 361/528 [00:57<00:26,  6.21it/s]

epoch 11 step 359 loss tensor(0.0093, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 401/528 [01:04<00:20,  6.23it/s]

epoch 11 step 399 loss tensor(0.0273, device='cuda:0', grad_fn=<DivBackward0>)


 84%|████████▎ | 441/528 [01:10<00:14,  6.20it/s]

epoch 11 step 439 loss tensor(0.0090, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████ | 481/528 [01:17<00:07,  6.19it/s]

epoch 11 step 479 loss tensor(0.0137, device='cuda:0', grad_fn=<DivBackward0>)


 99%|█████████▊| 521/528 [01:23<00:01,  6.19it/s]

epoch 11 step 519 loss tensor(0.0040, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 528/528 [01:24<00:00,  6.23it/s]


epoch 11 step 527 trainLoss: 0.008671225524294918


100%|██████████| 66/66 [00:03<00:00, 18.06it/s]


epoch  11 step 527 acc  0.8787878787878788
Update Max eval acc 0.8787878787878788


  8%|▊         | 41/528 [00:06<01:18,  6.24it/s]

epoch 12 step 39 loss tensor(0.0172, device='cuda:0', grad_fn=<DivBackward0>)


 15%|█▌        | 81/528 [00:12<01:10,  6.34it/s]

epoch 12 step 79 loss tensor(0.0067, device='cuda:0', grad_fn=<DivBackward0>)


 23%|██▎       | 121/528 [00:18<01:02,  6.47it/s]

epoch 12 step 119 loss tensor(0.0204, device='cuda:0', grad_fn=<DivBackward0>)


 30%|███       | 161/528 [00:24<00:59,  6.21it/s]

epoch 12 step 159 loss tensor(0.0037, device='cuda:0', grad_fn=<DivBackward0>)


 38%|███▊      | 201/528 [00:30<00:52,  6.28it/s]

epoch 12 step 199 loss tensor(0.0111, device='cuda:0', grad_fn=<DivBackward0>)


 46%|████▌     | 241/528 [00:37<00:45,  6.30it/s]

epoch 12 step 239 loss tensor(0.0037, device='cuda:0', grad_fn=<DivBackward0>)


 53%|█████▎    | 281/528 [00:43<00:35,  6.92it/s]

epoch 12 step 279 loss tensor(0.0054, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 321/528 [00:49<00:30,  6.73it/s]

epoch 12 step 319 loss tensor(0.0104, device='cuda:0', grad_fn=<DivBackward0>)


 68%|██████▊   | 361/528 [00:55<00:25,  6.55it/s]

epoch 12 step 359 loss tensor(0.0284, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 401/528 [01:02<00:20,  6.22it/s]

epoch 12 step 399 loss tensor(0.0096, device='cuda:0', grad_fn=<DivBackward0>)


 84%|████████▎ | 441/528 [01:08<00:13,  6.33it/s]

epoch 12 step 439 loss tensor(0.0022, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████ | 481/528 [01:14<00:07,  6.45it/s]

epoch 12 step 479 loss tensor(0.0041, device='cuda:0', grad_fn=<DivBackward0>)


 99%|█████████▊| 521/528 [01:20<00:01,  6.22it/s]

epoch 12 step 519 loss tensor(0.0034, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 528/528 [01:21<00:00,  6.46it/s]


epoch 12 step 527 trainLoss: 0.005638937502501931


100%|██████████| 66/66 [00:03<00:00, 18.77it/s]


epoch  12 step 527 acc  0.8863636363636364
Update Max eval acc 0.8863636363636364


  8%|▊         | 41/528 [00:06<01:15,  6.49it/s]

epoch 13 step 39 loss tensor(0.0020, device='cuda:0', grad_fn=<DivBackward0>)


 15%|█▌        | 81/528 [00:12<01:11,  6.24it/s]

epoch 13 step 79 loss tensor(0.0063, device='cuda:0', grad_fn=<DivBackward0>)


 23%|██▎       | 121/528 [00:18<01:05,  6.24it/s]

epoch 13 step 119 loss tensor(0.0164, device='cuda:0', grad_fn=<DivBackward0>)


 30%|███       | 161/528 [00:24<00:56,  6.44it/s]

epoch 13 step 159 loss tensor(0.0089, device='cuda:0', grad_fn=<DivBackward0>)


 38%|███▊      | 201/528 [00:31<00:50,  6.45it/s]

epoch 13 step 199 loss tensor(0.0056, device='cuda:0', grad_fn=<DivBackward0>)


 46%|████▌     | 241/528 [00:37<00:44,  6.43it/s]

epoch 13 step 239 loss tensor(0.0048, device='cuda:0', grad_fn=<DivBackward0>)


 53%|█████▎    | 281/528 [00:43<00:38,  6.45it/s]

epoch 13 step 279 loss tensor(0.0059, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 321/528 [00:49<00:33,  6.21it/s]

epoch 13 step 319 loss tensor(0.0092, device='cuda:0', grad_fn=<DivBackward0>)


 68%|██████▊   | 361/528 [00:55<00:27,  6.16it/s]

epoch 13 step 359 loss tensor(0.0032, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 401/528 [01:02<00:19,  6.38it/s]

epoch 13 step 399 loss tensor(0.0042, device='cuda:0', grad_fn=<DivBackward0>)


 84%|████████▎ | 441/528 [01:08<00:13,  6.47it/s]

epoch 13 step 439 loss tensor(0.0122, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████ | 481/528 [01:14<00:07,  6.33it/s]

epoch 13 step 479 loss tensor(0.0038, device='cuda:0', grad_fn=<DivBackward0>)


 99%|█████████▊| 521/528 [01:20<00:01,  6.19it/s]

epoch 13 step 519 loss tensor(0.0020, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 528/528 [01:21<00:00,  6.45it/s]


epoch 13 step 527 trainLoss: 0.0038376058222032348


100%|██████████| 66/66 [00:03<00:00, 19.01it/s]


epoch  13 step 527 acc  0.8787878787878788
Early Stopping record count 1 Max eval acc 0.8863636363636364


  8%|▊         | 41/528 [00:06<01:11,  6.80it/s]

epoch 14 step 39 loss tensor(0.0031, device='cuda:0', grad_fn=<DivBackward0>)


 15%|█▌        | 81/528 [00:12<01:05,  6.79it/s]

epoch 14 step 79 loss tensor(0.0241, device='cuda:0', grad_fn=<DivBackward0>)


 23%|██▎       | 121/528 [00:18<01:02,  6.56it/s]

epoch 14 step 119 loss tensor(0.0039, device='cuda:0', grad_fn=<DivBackward0>)


 30%|███       | 161/528 [00:25<00:59,  6.16it/s]

epoch 14 step 159 loss tensor(0.0055, device='cuda:0', grad_fn=<DivBackward0>)


 38%|███▊      | 201/528 [00:31<00:50,  6.44it/s]

epoch 14 step 199 loss tensor(0.0011, device='cuda:0', grad_fn=<DivBackward0>)


 46%|████▌     | 241/528 [00:37<00:46,  6.16it/s]

epoch 14 step 239 loss tensor(0.0025, device='cuda:0', grad_fn=<DivBackward0>)


 53%|█████▎    | 281/528 [00:43<00:38,  6.45it/s]

epoch 14 step 279 loss tensor(0.0151, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 321/528 [00:49<00:32,  6.28it/s]

epoch 14 step 319 loss tensor(0.0023, device='cuda:0', grad_fn=<DivBackward0>)


 68%|██████▊   | 361/528 [00:56<00:27,  6.12it/s]

epoch 14 step 359 loss tensor(0.0082, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 401/528 [01:02<00:20,  6.30it/s]

epoch 14 step 399 loss tensor(0.0211, device='cuda:0', grad_fn=<DivBackward0>)


 84%|████████▎ | 441/528 [01:08<00:12,  6.86it/s]

epoch 14 step 439 loss tensor(0.0063, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████ | 481/528 [01:14<00:06,  6.72it/s]

epoch 14 step 479 loss tensor(0.0008, device='cuda:0', grad_fn=<DivBackward0>)


 99%|█████████▊| 521/528 [01:20<00:01,  6.47it/s]

epoch 14 step 519 loss tensor(0.0105, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 528/528 [01:22<00:00,  6.41it/s]


epoch 14 step 527 trainLoss: 0.002948393826514031


100%|██████████| 66/66 [00:06<00:00,  9.86it/s]


epoch  14 step 527 acc  0.8787878787878788
Early Stopping record count 2 Max eval acc 0.8863636363636364


  8%|▊         | 40/528 [00:11<02:16,  3.59it/s]

epoch 15 step 39 loss tensor(0.0019, device='cuda:0', grad_fn=<DivBackward0>)


 15%|█▌        | 80/528 [00:22<02:10,  3.43it/s]

epoch 15 step 79 loss tensor(0.0032, device='cuda:0', grad_fn=<DivBackward0>)


 23%|██▎       | 120/528 [00:33<01:52,  3.62it/s]

epoch 15 step 119 loss tensor(0.0053, device='cuda:0', grad_fn=<DivBackward0>)


 30%|███       | 160/528 [00:44<01:45,  3.49it/s]

epoch 15 step 159 loss tensor(0.0021, device='cuda:0', grad_fn=<DivBackward0>)


 38%|███▊      | 200/528 [00:55<01:31,  3.57it/s]

epoch 15 step 199 loss tensor(0.0018, device='cuda:0', grad_fn=<DivBackward0>)


 45%|████▌     | 240/528 [01:06<01:21,  3.51it/s]

epoch 15 step 239 loss tensor(0.0011, device='cuda:0', grad_fn=<DivBackward0>)


 53%|█████▎    | 280/528 [01:17<01:11,  3.46it/s]

epoch 15 step 279 loss tensor(0.0026, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 320/528 [01:28<00:57,  3.60it/s]

epoch 15 step 319 loss tensor(0.0025, device='cuda:0', grad_fn=<DivBackward0>)


 68%|██████▊   | 360/528 [01:39<00:48,  3.43it/s]

epoch 15 step 359 loss tensor(0.0038, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 400/528 [01:50<00:35,  3.63it/s]

epoch 15 step 399 loss tensor(0.0009, device='cuda:0', grad_fn=<DivBackward0>)


 83%|████████▎ | 440/528 [02:01<00:24,  3.55it/s]

epoch 15 step 439 loss tensor(0.0035, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████ | 480/528 [02:12<00:13,  3.47it/s]

epoch 15 step 479 loss tensor(0.0023, device='cuda:0', grad_fn=<DivBackward0>)


 98%|█████████▊| 520/528 [02:23<00:02,  3.65it/s]

epoch 15 step 519 loss tensor(0.0008, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 528/528 [02:25<00:00,  3.63it/s]


epoch 15 step 527 trainLoss: 0.0022236653485930274


100%|██████████| 66/66 [00:06<00:00,  9.97it/s]


epoch  15 step 527 acc  0.8787878787878788
Early Stopping record count 3 Max eval acc 0.8863636363636364


  8%|▊         | 40/528 [00:10<02:12,  3.69it/s]

epoch 16 step 39 loss tensor(0.0042, device='cuda:0', grad_fn=<DivBackward0>)


 15%|█▌        | 80/528 [00:22<02:09,  3.47it/s]

epoch 16 step 79 loss tensor(0.0016, device='cuda:0', grad_fn=<DivBackward0>)


 23%|██▎       | 121/528 [00:32<01:07,  6.07it/s]

epoch 16 step 119 loss tensor(0.0303, device='cuda:0', grad_fn=<DivBackward0>)


 30%|███       | 160/528 [00:43<01:46,  3.46it/s]

epoch 16 step 159 loss tensor(0.0026, device='cuda:0', grad_fn=<DivBackward0>)


 38%|███▊      | 200/528 [00:54<01:31,  3.60it/s]

epoch 16 step 199 loss tensor(0.0035, device='cuda:0', grad_fn=<DivBackward0>)


 45%|████▌     | 240/528 [01:05<01:22,  3.49it/s]

epoch 16 step 239 loss tensor(0.0033, device='cuda:0', grad_fn=<DivBackward0>)


 53%|█████▎    | 280/528 [01:16<01:09,  3.56it/s]

epoch 16 step 279 loss tensor(0.0057, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 320/528 [01:27<00:58,  3.59it/s]

epoch 16 step 319 loss tensor(0.0017, device='cuda:0', grad_fn=<DivBackward0>)


 68%|██████▊   | 360/528 [01:38<00:48,  3.45it/s]

epoch 16 step 359 loss tensor(0.0072, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 400/528 [01:49<00:35,  3.57it/s]

epoch 16 step 399 loss tensor(0.0048, device='cuda:0', grad_fn=<DivBackward0>)


 83%|████████▎ | 440/528 [02:00<00:25,  3.48it/s]

epoch 16 step 439 loss tensor(0.0007, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████ | 480/528 [02:11<00:13,  3.66it/s]

epoch 16 step 479 loss tensor(0.0009, device='cuda:0', grad_fn=<DivBackward0>)


 98%|█████████▊| 520/528 [02:22<00:02,  3.56it/s]

epoch 16 step 519 loss tensor(0.0073, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 528/528 [02:24<00:00,  3.65it/s]


epoch 16 step 527 trainLoss: 0.002263567858817899


100%|██████████| 66/66 [00:06<00:00, 10.02it/s]


epoch  16 step 527 acc  0.8636363636363636
Early Stopping record count 4 Max eval acc 0.8863636363636364


  8%|▊         | 40/528 [00:11<02:19,  3.51it/s]

epoch 17 step 39 loss tensor(0.0019, device='cuda:0', grad_fn=<DivBackward0>)


 15%|█▌        | 80/528 [00:22<02:08,  3.47it/s]

epoch 17 step 79 loss tensor(0.0032, device='cuda:0', grad_fn=<DivBackward0>)


 23%|██▎       | 120/528 [00:32<01:54,  3.56it/s]

epoch 17 step 119 loss tensor(0.0031, device='cuda:0', grad_fn=<DivBackward0>)


 30%|███       | 160/528 [00:43<01:46,  3.46it/s]

epoch 17 step 159 loss tensor(0.0018, device='cuda:0', grad_fn=<DivBackward0>)


 38%|███▊      | 200/528 [00:54<01:30,  3.62it/s]

epoch 17 step 199 loss tensor(0.0030, device='cuda:0', grad_fn=<DivBackward0>)


 45%|████▌     | 240/528 [01:05<01:22,  3.51it/s]

epoch 17 step 239 loss tensor(0.0022, device='cuda:0', grad_fn=<DivBackward0>)


 53%|█████▎    | 280/528 [01:16<00:52,  4.76it/s]

epoch 17 step 279 loss tensor(0.0009, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 320/528 [01:27<01:00,  3.43it/s]

epoch 17 step 319 loss tensor(0.0043, device='cuda:0', grad_fn=<DivBackward0>)


 68%|██████▊   | 360/528 [01:38<00:45,  3.69it/s]

epoch 17 step 359 loss tensor(0.0061, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 400/528 [01:49<00:36,  3.48it/s]

epoch 17 step 399 loss tensor(0.0013, device='cuda:0', grad_fn=<DivBackward0>)


 83%|████████▎ | 440/528 [02:00<00:24,  3.54it/s]

epoch 17 step 439 loss tensor(0.0017, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████ | 480/528 [02:11<00:13,  3.50it/s]

epoch 17 step 479 loss tensor(0.0007, device='cuda:0', grad_fn=<DivBackward0>)


 98%|█████████▊| 520/528 [02:22<00:02,  3.46it/s]

epoch 17 step 519 loss tensor(0.0008, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 528/528 [02:24<00:00,  3.65it/s]


epoch 17 step 527 trainLoss: 0.0017205450058099814


100%|██████████| 66/66 [00:06<00:00,  9.93it/s]


epoch  17 step 527 acc  0.8939393939393939
Update Max eval acc 0.8939393939393939


  8%|▊         | 40/528 [00:10<02:14,  3.62it/s]

epoch 18 step 39 loss tensor(0.0051, device='cuda:0', grad_fn=<DivBackward0>)


 15%|█▌        | 80/528 [00:22<02:08,  3.49it/s]

epoch 18 step 79 loss tensor(0.0058, device='cuda:0', grad_fn=<DivBackward0>)


 23%|██▎       | 120/528 [00:33<01:53,  3.61it/s]

epoch 18 step 119 loss tensor(0.0062, device='cuda:0', grad_fn=<DivBackward0>)


 30%|███       | 160/528 [00:44<01:41,  3.62it/s]

epoch 18 step 159 loss tensor(0.0007, device='cuda:0', grad_fn=<DivBackward0>)


 38%|███▊      | 200/528 [00:55<01:35,  3.44it/s]

epoch 18 step 199 loss tensor(0.0037, device='cuda:0', grad_fn=<DivBackward0>)


 45%|████▌     | 240/528 [01:06<01:18,  3.68it/s]

epoch 18 step 239 loss tensor(0.0023, device='cuda:0', grad_fn=<DivBackward0>)


 53%|█████▎    | 280/528 [01:16<00:42,  5.90it/s]

epoch 18 step 279 loss tensor(0.0021, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 320/528 [01:27<00:58,  3.58it/s]

epoch 18 step 319 loss tensor(0.0034, device='cuda:0', grad_fn=<DivBackward0>)


 68%|██████▊   | 360/528 [01:38<00:48,  3.44it/s]

epoch 18 step 359 loss tensor(0.0045, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 400/528 [01:49<00:35,  3.62it/s]

epoch 18 step 399 loss tensor(0.0030, device='cuda:0', grad_fn=<DivBackward0>)


 83%|████████▎ | 440/528 [02:00<00:25,  3.46it/s]

epoch 18 step 439 loss tensor(0.0007, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████ | 480/528 [02:11<00:13,  3.54it/s]

epoch 18 step 479 loss tensor(0.0017, device='cuda:0', grad_fn=<DivBackward0>)


 98%|█████████▊| 520/528 [02:22<00:02,  3.64it/s]

epoch 18 step 519 loss tensor(0.0007, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 528/528 [02:24<00:00,  3.65it/s]


epoch 18 step 527 trainLoss: 0.0015081927066686067


100%|██████████| 66/66 [00:06<00:00,  9.98it/s]


epoch  18 step 527 acc  0.8787878787878788
Early Stopping record count 1 Max eval acc 0.8939393939393939


  8%|▊         | 40/528 [00:10<02:15,  3.61it/s]

epoch 19 step 39 loss tensor(0.0007, device='cuda:0', grad_fn=<DivBackward0>)


 15%|█▌        | 80/528 [00:21<02:10,  3.44it/s]

epoch 19 step 79 loss tensor(0.0026, device='cuda:0', grad_fn=<DivBackward0>)


 23%|██▎       | 120/528 [00:32<01:51,  3.65it/s]

epoch 19 step 119 loss tensor(0.0020, device='cuda:0', grad_fn=<DivBackward0>)


 30%|███       | 160/528 [00:43<01:45,  3.49it/s]

epoch 19 step 159 loss tensor(0.0070, device='cuda:0', grad_fn=<DivBackward0>)


 38%|███▊      | 200/528 [00:54<01:33,  3.50it/s]

epoch 19 step 199 loss tensor(0.0010, device='cuda:0', grad_fn=<DivBackward0>)


 45%|████▌     | 240/528 [01:05<01:19,  3.63it/s]

epoch 19 step 239 loss tensor(0.0009, device='cuda:0', grad_fn=<DivBackward0>)


 53%|█████▎    | 280/528 [01:16<01:10,  3.50it/s]

epoch 19 step 279 loss tensor(0.0106, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 320/528 [01:27<01:00,  3.44it/s]

epoch 19 step 319 loss tensor(0.0003, device='cuda:0', grad_fn=<DivBackward0>)


 68%|██████▊   | 360/528 [01:38<00:46,  3.65it/s]

epoch 19 step 359 loss tensor(0.0003, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 400/528 [01:49<00:36,  3.54it/s]

epoch 19 step 399 loss tensor(0.0014, device='cuda:0', grad_fn=<DivBackward0>)


 84%|████████▎ | 441/528 [02:00<00:15,  5.59it/s]

epoch 19 step 439 loss tensor(0.0010, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████ | 481/528 [02:06<00:07,  6.21it/s]

epoch 19 step 479 loss tensor(0.0020, device='cuda:0', grad_fn=<DivBackward0>)


 99%|█████████▊| 521/528 [02:12<00:01,  6.21it/s]

epoch 19 step 519 loss tensor(0.0007, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 528/528 [02:14<00:00,  3.94it/s]


epoch 19 step 527 trainLoss: 0.001427649619348482


100%|██████████| 66/66 [00:03<00:00, 18.10it/s]


epoch  19 step 527 acc  0.8787878787878788
Early Stopping record count 2 Max eval acc 0.8939393939393939


  8%|▊         | 41/528 [00:06<01:18,  6.21it/s]

epoch 20 step 39 loss tensor(0.0019, device='cuda:0', grad_fn=<DivBackward0>)


 15%|█▌        | 81/528 [00:13<01:11,  6.21it/s]

epoch 20 step 79 loss tensor(0.0011, device='cuda:0', grad_fn=<DivBackward0>)


 23%|██▎       | 120/528 [00:19<01:06,  6.11it/s]

epoch 20 step 119 loss tensor(0.0017, device='cuda:0', grad_fn=<DivBackward0>)


 30%|███       | 161/528 [00:25<00:59,  6.20it/s]

epoch 20 step 159 loss tensor(0.0042, device='cuda:0', grad_fn=<DivBackward0>)


 38%|███▊      | 201/528 [00:32<00:52,  6.21it/s]

epoch 20 step 199 loss tensor(0.0032, device='cuda:0', grad_fn=<DivBackward0>)


 46%|████▌     | 241/528 [00:38<00:46,  6.20it/s]

epoch 20 step 239 loss tensor(0.0008, device='cuda:0', grad_fn=<DivBackward0>)


 53%|█████▎    | 281/528 [00:45<00:39,  6.20it/s]

epoch 20 step 279 loss tensor(0.0008, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 321/528 [00:51<00:33,  6.19it/s]

epoch 20 step 319 loss tensor(0.0025, device='cuda:0', grad_fn=<DivBackward0>)


 68%|██████▊   | 361/528 [00:58<00:26,  6.20it/s]

epoch 20 step 359 loss tensor(0.0018, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 401/528 [01:04<00:20,  6.20it/s]

epoch 20 step 399 loss tensor(0.0011, device='cuda:0', grad_fn=<DivBackward0>)


 84%|████████▎ | 441/528 [01:11<00:14,  6.17it/s]

epoch 20 step 439 loss tensor(0.0018, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████ | 481/528 [01:17<00:07,  6.19it/s]

epoch 20 step 479 loss tensor(0.0018, device='cuda:0', grad_fn=<DivBackward0>)


 99%|█████████▊| 521/528 [01:23<00:01,  6.18it/s]

epoch 20 step 519 loss tensor(0.0046, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 528/528 [01:25<00:00,  6.21it/s]


epoch 20 step 527 trainLoss: 0.0011308501071019264


100%|██████████| 66/66 [00:03<00:00, 18.09it/s]


epoch  20 step 527 acc  0.8863636363636364
Early Stopping record count 3 Max eval acc 0.8939393939393939


  8%|▊         | 41/528 [00:06<01:19,  6.12it/s]

epoch 21 step 39 loss tensor(0.0017, device='cuda:0', grad_fn=<DivBackward0>)


 15%|█▌        | 81/528 [00:13<01:12,  6.18it/s]

epoch 21 step 79 loss tensor(0.0016, device='cuda:0', grad_fn=<DivBackward0>)


 23%|██▎       | 121/528 [00:19<01:05,  6.20it/s]

epoch 21 step 119 loss tensor(0.0030, device='cuda:0', grad_fn=<DivBackward0>)


 30%|███       | 161/528 [00:25<00:59,  6.19it/s]

epoch 21 step 159 loss tensor(0.0009, device='cuda:0', grad_fn=<DivBackward0>)


 38%|███▊      | 201/528 [00:32<00:52,  6.21it/s]

epoch 21 step 199 loss tensor(0.0004, device='cuda:0', grad_fn=<DivBackward0>)


 46%|████▌     | 241/528 [00:38<00:46,  6.18it/s]

epoch 21 step 239 loss tensor(0.0016, device='cuda:0', grad_fn=<DivBackward0>)


 53%|█████▎    | 281/528 [00:45<00:40,  6.17it/s]

epoch 21 step 279 loss tensor(0.0022, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 321/528 [00:51<00:33,  6.19it/s]

epoch 21 step 319 loss tensor(0.0038, device='cuda:0', grad_fn=<DivBackward0>)


 68%|██████▊   | 361/528 [00:58<00:26,  6.20it/s]

epoch 21 step 359 loss tensor(0.0081, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 401/528 [01:04<00:20,  6.20it/s]

epoch 21 step 399 loss tensor(0.0009, device='cuda:0', grad_fn=<DivBackward0>)


 84%|████████▎ | 441/528 [01:10<00:14,  6.20it/s]

epoch 21 step 439 loss tensor(0.0005, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████ | 481/528 [01:17<00:07,  6.20it/s]

epoch 21 step 479 loss tensor(0.0005, device='cuda:0', grad_fn=<DivBackward0>)


 99%|█████████▊| 521/528 [01:23<00:01,  6.20it/s]

epoch 21 step 519 loss tensor(0.0004, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 528/528 [01:24<00:00,  6.22it/s]


epoch 21 step 527 trainLoss: 0.0011021757227767507


100%|██████████| 66/66 [00:03<00:00, 17.71it/s]


epoch  21 step 527 acc  0.8863636363636364
Early Stopping record count 4 Max eval acc 0.8939393939393939


  8%|▊         | 41/528 [00:06<01:18,  6.20it/s]

epoch 22 step 39 loss tensor(0.0013, device='cuda:0', grad_fn=<DivBackward0>)


 15%|█▌        | 81/528 [00:13<01:12,  6.18it/s]

epoch 22 step 79 loss tensor(0.0015, device='cuda:0', grad_fn=<DivBackward0>)


 23%|██▎       | 121/528 [00:19<01:05,  6.19it/s]

epoch 22 step 119 loss tensor(0.0008, device='cuda:0', grad_fn=<DivBackward0>)


 30%|███       | 161/528 [00:25<00:59,  6.22it/s]

epoch 22 step 159 loss tensor(0.0012, device='cuda:0', grad_fn=<DivBackward0>)


 38%|███▊      | 201/528 [00:32<00:52,  6.17it/s]

epoch 22 step 199 loss tensor(0.0003, device='cuda:0', grad_fn=<DivBackward0>)


 46%|████▌     | 241/528 [00:38<00:44,  6.48it/s]

epoch 22 step 239 loss tensor(0.0018, device='cuda:0', grad_fn=<DivBackward0>)


 53%|█████▎    | 281/528 [00:45<00:39,  6.21it/s]

epoch 22 step 279 loss tensor(0.0011, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 321/528 [00:51<00:33,  6.20it/s]

epoch 22 step 319 loss tensor(0.0028, device='cuda:0', grad_fn=<DivBackward0>)


 68%|██████▊   | 361/528 [00:57<00:26,  6.21it/s]

epoch 22 step 359 loss tensor(0.0008, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 401/528 [01:04<00:20,  6.21it/s]

epoch 22 step 399 loss tensor(0.0013, device='cuda:0', grad_fn=<DivBackward0>)


 84%|████████▎ | 441/528 [01:10<00:13,  6.22it/s]

epoch 22 step 439 loss tensor(0.0014, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████ | 481/528 [01:17<00:07,  5.91it/s]

epoch 22 step 479 loss tensor(0.0011, device='cuda:0', grad_fn=<DivBackward0>)


 99%|█████████▊| 521/528 [01:23<00:01,  6.20it/s]

epoch 22 step 519 loss tensor(0.0003, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 528/528 [01:24<00:00,  6.22it/s]


epoch 22 step 527 trainLoss: 0.0008856432745315859


100%|██████████| 66/66 [00:03<00:00, 18.13it/s]


epoch  22 step 527 acc  0.8863636363636364
Early Stopping:Epoch 22  Step 527 Eval_acc 0.8863636363636364


In [7]:
model = GPT2ForSequenceClassification.from_pretrained("GPT2saved_models")
tokenizer = GPT2Tokenizer.from_pretrained("GPT2saved_models")
adversari_training(model,tokenizer,"../dataset/data_folder/processed_gcjpy/train.csv","../dataset/data_folder/processed_gcjpy/valid.csv",2,2,30,5e-5,5,"GPT2-ADV-FINE-TUNING","adv-fine-tung.log","../dataset/data_folder/processed_gcjpy/adv_training.csv","fine-tuning")

Found cached dataset csv (/home/ljc/.cache/huggingface/datasets/csv/default-b125add5293cdc5e/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1)
100%|██████████| 1/1 [00:00<00:00, 1151.33it/s]
Found cached dataset csv (/home/ljc/.cache/huggingface/datasets/csv/default-ab07e3df6ac105f7/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1)
100%|██████████| 1/1 [00:00<00:00, 1323.12it/s]
Found cached dataset csv (/home/ljc/.cache/huggingface/datasets/csv/default-f03b36a028be774d/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1)
100%|██████████| 1/1 [00:00<00:00, 1264.87it/s]
 15%|█▌        | 40/264 [00:11<01:01,  3.63it/s]

epoch 0 step 39 loss tensor(2.5713, device='cuda:0', grad_fn=<DivBackward0>)


 30%|███       | 80/264 [00:22<00:51,  3.57it/s]

epoch 0 step 79 loss tensor(1.3749, device='cuda:0', grad_fn=<DivBackward0>)


 45%|████▌     | 120/264 [00:33<00:41,  3.44it/s]

epoch 0 step 119 loss tensor(1.0562, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 160/264 [00:44<00:28,  3.66it/s]

epoch 0 step 159 loss tensor(1.1568, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 200/264 [00:55<00:18,  3.50it/s]

epoch 0 step 199 loss tensor(1.1454, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████ | 240/264 [01:06<00:06,  3.54it/s]

epoch 0 step 239 loss tensor(0.9674, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 264/264 [01:13<00:00,  3.60it/s]


epoch 0 step 263 trainLoss: 0.550278610578089


100%|██████████| 66/66 [00:06<00:00, 10.07it/s]


epoch  0 step 263 acc  0.22727272727272727
Update Max eval acc 0.22727272727272727


 15%|█▌        | 40/264 [00:11<01:04,  3.49it/s]

epoch 1 step 39 loss tensor(0.7239, device='cuda:0', grad_fn=<DivBackward0>)


 30%|███       | 80/264 [00:22<00:53,  3.47it/s]

epoch 1 step 79 loss tensor(1.0801, device='cuda:0', grad_fn=<DivBackward0>)


 45%|████▌     | 120/264 [00:33<00:39,  3.60it/s]

epoch 1 step 119 loss tensor(0.7432, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 160/264 [00:44<00:29,  3.52it/s]

epoch 1 step 159 loss tensor(0.8755, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 200/264 [00:55<00:18,  3.46it/s]

epoch 1 step 199 loss tensor(0.7291, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████ | 240/264 [01:06<00:06,  3.69it/s]

epoch 1 step 239 loss tensor(0.7755, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 264/264 [01:12<00:00,  3.66it/s]


epoch 1 step 263 trainLoss: 0.4361478892916983


100%|██████████| 66/66 [00:06<00:00, 10.11it/s]


epoch  1 step 263 acc  0.5378787878787878
Update Max eval acc 0.5378787878787878


 15%|█▌        | 40/264 [00:10<01:02,  3.61it/s]

epoch 2 step 39 loss tensor(0.5759, device='cuda:0', grad_fn=<DivBackward0>)


 30%|███       | 80/264 [00:22<00:53,  3.46it/s]

epoch 2 step 79 loss tensor(1.4324, device='cuda:0', grad_fn=<DivBackward0>)


 45%|████▌     | 120/264 [00:33<00:40,  3.56it/s]

epoch 2 step 119 loss tensor(0.5595, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 160/264 [00:44<00:29,  3.58it/s]

epoch 2 step 159 loss tensor(0.7297, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 200/264 [00:55<00:18,  3.48it/s]

epoch 2 step 199 loss tensor(0.2892, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████ | 240/264 [01:06<00:06,  3.55it/s]

epoch 2 step 239 loss tensor(0.3222, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 264/264 [01:12<00:00,  3.63it/s]


epoch 2 step 263 trainLoss: 0.30967567584505584


100%|██████████| 66/66 [00:06<00:00, 10.06it/s]


epoch  2 step 263 acc  0.6666666666666666
Update Max eval acc 0.6666666666666666


 15%|█▌        | 40/264 [00:10<01:00,  3.68it/s]

epoch 3 step 39 loss tensor(0.2349, device='cuda:0', grad_fn=<DivBackward0>)


 30%|███       | 80/264 [00:21<00:52,  3.53it/s]

epoch 3 step 79 loss tensor(0.9964, device='cuda:0', grad_fn=<DivBackward0>)


 45%|████▌     | 120/264 [00:32<00:41,  3.44it/s]

epoch 3 step 119 loss tensor(0.2376, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 160/264 [00:43<00:28,  3.69it/s]

epoch 3 step 159 loss tensor(0.3682, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 201/264 [00:54<00:11,  5.46it/s]

epoch 3 step 199 loss tensor(0.0837, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████▏| 241/264 [01:00<00:03,  6.19it/s]

epoch 3 step 239 loss tensor(0.1588, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 264/264 [01:04<00:00,  4.08it/s]


epoch 3 step 263 trainLoss: 0.1688491338123144


100%|██████████| 66/66 [00:03<00:00, 18.15it/s]


epoch  3 step 263 acc  0.7121212121212122
Update Max eval acc 0.7121212121212122


 16%|█▌        | 41/264 [00:06<00:35,  6.25it/s]

epoch 4 step 39 loss tensor(0.2085, device='cuda:0', grad_fn=<DivBackward0>)


 31%|███       | 81/264 [00:12<00:29,  6.24it/s]

epoch 4 step 79 loss tensor(0.4378, device='cuda:0', grad_fn=<DivBackward0>)


 46%|████▌     | 121/264 [00:19<00:22,  6.22it/s]

epoch 4 step 119 loss tensor(0.1297, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 161/264 [00:25<00:16,  6.23it/s]

epoch 4 step 159 loss tensor(0.2973, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 201/264 [00:32<00:10,  6.23it/s]

epoch 4 step 199 loss tensor(0.2444, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████▏| 241/264 [00:38<00:03,  6.74it/s]

epoch 4 step 239 loss tensor(0.0243, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 264/264 [00:42<00:00,  6.25it/s]


epoch 4 step 263 trainLoss: 0.0890694415980816


100%|██████████| 66/66 [00:03<00:00, 18.14it/s]


epoch  4 step 263 acc  0.7121212121212122


 16%|█▌        | 41/264 [00:06<00:35,  6.21it/s]

epoch 5 step 39 loss tensor(0.2218, device='cuda:0', grad_fn=<DivBackward0>)


 31%|███       | 81/264 [00:12<00:29,  6.21it/s]

epoch 5 step 79 loss tensor(0.3284, device='cuda:0', grad_fn=<DivBackward0>)


 46%|████▌     | 121/264 [00:19<00:23,  6.22it/s]

epoch 5 step 119 loss tensor(0.0376, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 161/264 [00:25<00:17,  5.99it/s]

epoch 5 step 159 loss tensor(0.2484, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 201/264 [00:32<00:10,  6.22it/s]

epoch 5 step 199 loss tensor(0.0461, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████▏| 241/264 [00:38<00:03,  6.19it/s]

epoch 5 step 239 loss tensor(0.0223, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 264/264 [00:42<00:00,  6.22it/s]


epoch 5 step 263 trainLoss: 0.0552695841344592


100%|██████████| 66/66 [00:03<00:00, 18.14it/s]


epoch  5 step 263 acc  0.7878787878787878
Update Max eval acc 0.7878787878787878


 16%|█▌        | 41/264 [00:06<00:35,  6.25it/s]

epoch 6 step 39 loss tensor(0.1721, device='cuda:0', grad_fn=<DivBackward0>)


 31%|███       | 81/264 [00:12<00:29,  6.23it/s]

epoch 6 step 79 loss tensor(0.2723, device='cuda:0', grad_fn=<DivBackward0>)


 46%|████▌     | 121/264 [00:19<00:22,  6.23it/s]

epoch 6 step 119 loss tensor(0.0623, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 161/264 [00:25<00:16,  6.22it/s]

epoch 6 step 159 loss tensor(0.2436, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 201/264 [00:32<00:10,  6.22it/s]

epoch 6 step 199 loss tensor(0.0236, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████▏| 241/264 [00:38<00:03,  6.19it/s]

epoch 6 step 239 loss tensor(0.0839, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 264/264 [00:42<00:00,  6.22it/s]


epoch 6 step 263 trainLoss: 0.0371025135924962


100%|██████████| 66/66 [00:03<00:00, 18.16it/s]


epoch  6 step 263 acc  0.8409090909090909
Update Max eval acc 0.8409090909090909


 16%|█▌        | 41/264 [00:06<00:35,  6.25it/s]

epoch 7 step 39 loss tensor(0.1498, device='cuda:0', grad_fn=<DivBackward0>)


 31%|███       | 81/264 [00:12<00:29,  6.24it/s]

epoch 7 step 79 loss tensor(0.4657, device='cuda:0', grad_fn=<DivBackward0>)


 46%|████▌     | 121/264 [00:19<00:23,  6.02it/s]

epoch 7 step 119 loss tensor(0.0134, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 161/264 [00:25<00:16,  6.22it/s]

epoch 7 step 159 loss tensor(0.0378, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 201/264 [00:32<00:10,  6.22it/s]

epoch 7 step 199 loss tensor(0.0089, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████▏| 241/264 [00:38<00:03,  6.20it/s]

epoch 7 step 239 loss tensor(0.0165, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 264/264 [00:42<00:00,  6.25it/s]


epoch 7 step 263 trainLoss: 0.021397891564750655


100%|██████████| 66/66 [00:03<00:00, 18.18it/s]


epoch  7 step 263 acc  0.8409090909090909


 16%|█▌        | 41/264 [00:06<00:35,  6.21it/s]

epoch 8 step 39 loss tensor(0.0199, device='cuda:0', grad_fn=<DivBackward0>)


 31%|███       | 81/264 [00:13<00:29,  6.21it/s]

epoch 8 step 79 loss tensor(0.0373, device='cuda:0', grad_fn=<DivBackward0>)


 46%|████▌     | 121/264 [00:19<00:22,  6.22it/s]

epoch 8 step 119 loss tensor(0.0117, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 161/264 [00:25<00:16,  6.21it/s]

epoch 8 step 159 loss tensor(0.0145, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 201/264 [00:32<00:10,  6.22it/s]

epoch 8 step 199 loss tensor(0.0491, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████▏| 241/264 [00:38<00:03,  6.18it/s]

epoch 8 step 239 loss tensor(0.0127, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 264/264 [00:42<00:00,  6.21it/s]


epoch 8 step 263 trainLoss: 0.009576581000036447


100%|██████████| 66/66 [00:03<00:00, 18.14it/s]


epoch  8 step 263 acc  0.8712121212121212
Update Max eval acc 0.8712121212121212


 16%|█▌        | 41/264 [00:06<00:35,  6.24it/s]

epoch 9 step 39 loss tensor(0.0069, device='cuda:0', grad_fn=<DivBackward0>)


 31%|███       | 81/264 [00:12<00:29,  6.23it/s]

epoch 9 step 79 loss tensor(0.0546, device='cuda:0', grad_fn=<DivBackward0>)


 46%|████▌     | 121/264 [00:19<00:22,  6.22it/s]

epoch 9 step 119 loss tensor(0.0215, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 161/264 [00:25<00:16,  6.22it/s]

epoch 9 step 159 loss tensor(0.0236, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 201/264 [00:32<00:10,  6.22it/s]

epoch 9 step 199 loss tensor(0.0117, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████▏| 241/264 [00:38<00:03,  6.19it/s]

epoch 9 step 239 loss tensor(0.0033, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 264/264 [00:42<00:00,  6.24it/s]


epoch 9 step 263 trainLoss: 0.006874107718128168


100%|██████████| 66/66 [00:03<00:00, 18.10it/s]


epoch  9 step 263 acc  0.8787878787878788
Update Max eval acc 0.8787878787878788


 16%|█▌        | 41/264 [00:06<00:35,  6.26it/s]

epoch 10 step 39 loss tensor(0.0026, device='cuda:0', grad_fn=<DivBackward0>)


 31%|███       | 81/264 [00:12<00:29,  6.23it/s]

epoch 10 step 79 loss tensor(0.0315, device='cuda:0', grad_fn=<DivBackward0>)


 46%|████▌     | 121/264 [00:19<00:22,  6.22it/s]

epoch 10 step 119 loss tensor(0.0028, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 161/264 [00:25<00:16,  6.22it/s]

epoch 10 step 159 loss tensor(0.0227, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 201/264 [00:32<00:10,  6.23it/s]

epoch 10 step 199 loss tensor(0.0087, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████▏| 241/264 [00:38<00:03,  6.19it/s]

epoch 10 step 239 loss tensor(0.0154, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 264/264 [00:42<00:00,  6.24it/s]


epoch 10 step 263 trainLoss: 0.006490108598055403


100%|██████████| 66/66 [00:03<00:00, 18.13it/s]


epoch  10 step 263 acc  0.8787878787878788


 16%|█▌        | 41/264 [00:06<00:35,  6.22it/s]

epoch 11 step 39 loss tensor(0.0069, device='cuda:0', grad_fn=<DivBackward0>)


 31%|███       | 81/264 [00:12<00:29,  6.22it/s]

epoch 11 step 79 loss tensor(0.0242, device='cuda:0', grad_fn=<DivBackward0>)


 46%|████▌     | 121/264 [00:19<00:22,  6.22it/s]

epoch 11 step 119 loss tensor(0.0068, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 161/264 [00:25<00:16,  6.22it/s]

epoch 11 step 159 loss tensor(0.0221, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 201/264 [00:32<00:10,  6.21it/s]

epoch 11 step 199 loss tensor(0.0028, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████▏| 241/264 [00:38<00:03,  6.19it/s]

epoch 11 step 239 loss tensor(0.0091, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 264/264 [00:42<00:00,  6.22it/s]


epoch 11 step 263 trainLoss: 0.004464585976992649


100%|██████████| 66/66 [00:03<00:00, 18.13it/s]


epoch  11 step 263 acc  0.8939393939393939
Update Max eval acc 0.8939393939393939


 16%|█▌        | 41/264 [00:06<00:35,  6.26it/s]

epoch 12 step 39 loss tensor(0.0030, device='cuda:0', grad_fn=<DivBackward0>)


 31%|███       | 81/264 [00:12<00:29,  6.23it/s]

epoch 12 step 79 loss tensor(0.0180, device='cuda:0', grad_fn=<DivBackward0>)


 46%|████▌     | 121/264 [00:19<00:23,  6.22it/s]

epoch 12 step 119 loss tensor(0.0022, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 161/264 [00:25<00:16,  6.24it/s]

epoch 12 step 159 loss tensor(0.0083, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 201/264 [00:32<00:10,  6.22it/s]

epoch 12 step 199 loss tensor(0.0122, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████▏| 241/264 [00:38<00:03,  6.20it/s]

epoch 12 step 239 loss tensor(0.0050, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 264/264 [00:42<00:00,  6.24it/s]


epoch 12 step 263 trainLoss: 0.003427414632383399


100%|██████████| 66/66 [00:03<00:00, 18.09it/s]


epoch  12 step 263 acc  0.8787878787878788
Early Stopping record count 1 Max eval acc 0.8939393939393939


 16%|█▌        | 41/264 [00:06<00:35,  6.21it/s]

epoch 13 step 39 loss tensor(0.0032, device='cuda:0', grad_fn=<DivBackward0>)


 31%|███       | 81/264 [00:12<00:29,  6.20it/s]

epoch 13 step 79 loss tensor(0.0103, device='cuda:0', grad_fn=<DivBackward0>)


 46%|████▌     | 121/264 [00:19<00:23,  6.20it/s]

epoch 13 step 119 loss tensor(0.0050, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 161/264 [00:25<00:16,  6.22it/s]

epoch 13 step 159 loss tensor(0.0070, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 201/264 [00:32<00:10,  6.22it/s]

epoch 13 step 199 loss tensor(0.0021, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████▏| 241/264 [00:38<00:03,  6.19it/s]

epoch 13 step 239 loss tensor(0.0011, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 264/264 [00:42<00:00,  6.23it/s]


epoch 13 step 263 trainLoss: 0.003573735207475803


100%|██████████| 66/66 [00:03<00:00, 18.08it/s]


epoch  13 step 263 acc  0.8863636363636364
Early Stopping record count 2 Max eval acc 0.8939393939393939


 16%|█▌        | 41/264 [00:06<00:35,  6.21it/s]

epoch 14 step 39 loss tensor(0.0128, device='cuda:0', grad_fn=<DivBackward0>)


 31%|███       | 81/264 [00:13<00:31,  5.76it/s]

epoch 14 step 79 loss tensor(0.0278, device='cuda:0', grad_fn=<DivBackward0>)


 46%|████▌     | 121/264 [00:19<00:23,  6.21it/s]

epoch 14 step 119 loss tensor(0.0071, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 161/264 [00:25<00:16,  6.20it/s]

epoch 14 step 159 loss tensor(0.0163, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 201/264 [00:32<00:10,  6.20it/s]

epoch 14 step 199 loss tensor(0.0037, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████▏| 241/264 [00:38<00:03,  6.18it/s]

epoch 14 step 239 loss tensor(0.0025, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 264/264 [00:42<00:00,  6.21it/s]


epoch 14 step 263 trainLoss: 0.003099523009013589


100%|██████████| 66/66 [00:03<00:00, 18.07it/s]


epoch  14 step 263 acc  0.8939393939393939


 16%|█▌        | 41/264 [00:06<00:35,  6.21it/s]

epoch 15 step 39 loss tensor(0.0035, device='cuda:0', grad_fn=<DivBackward0>)


 31%|███       | 81/264 [00:13<00:29,  6.21it/s]

epoch 15 step 79 loss tensor(0.0207, device='cuda:0', grad_fn=<DivBackward0>)


 46%|████▌     | 121/264 [00:19<00:23,  6.20it/s]

epoch 15 step 119 loss tensor(0.0024, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 161/264 [00:25<00:16,  6.20it/s]

epoch 15 step 159 loss tensor(0.0086, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 201/264 [00:32<00:10,  6.23it/s]

epoch 15 step 199 loss tensor(0.0039, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████▏| 241/264 [00:38<00:03,  6.62it/s]

epoch 15 step 239 loss tensor(0.0021, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 264/264 [00:42<00:00,  6.21it/s]


epoch 15 step 263 trainLoss: 0.0025585968795150456


100%|██████████| 66/66 [00:03<00:00, 18.10it/s]


epoch  15 step 263 acc  0.8939393939393939


 16%|█▌        | 41/264 [00:06<00:35,  6.21it/s]

epoch 16 step 39 loss tensor(0.0025, device='cuda:0', grad_fn=<DivBackward0>)


 31%|███       | 81/264 [00:12<00:29,  6.21it/s]

epoch 16 step 79 loss tensor(0.0362, device='cuda:0', grad_fn=<DivBackward0>)


 46%|████▌     | 121/264 [00:19<00:23,  6.22it/s]

epoch 16 step 119 loss tensor(0.0023, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 161/264 [00:25<00:16,  6.22it/s]

epoch 16 step 159 loss tensor(0.0192, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 201/264 [00:32<00:10,  6.22it/s]

epoch 16 step 199 loss tensor(0.0065, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████▏| 241/264 [00:38<00:03,  6.19it/s]

epoch 16 step 239 loss tensor(0.0033, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 264/264 [00:42<00:00,  6.22it/s]


epoch 16 step 263 trainLoss: 0.0025909181405784857


100%|██████████| 66/66 [00:03<00:00, 18.11it/s]


epoch  16 step 263 acc  0.9015151515151515
Update Max eval acc 0.9015151515151515


 16%|█▌        | 41/264 [00:06<00:35,  6.25it/s]

epoch 17 step 39 loss tensor(0.0263, device='cuda:0', grad_fn=<DivBackward0>)


 31%|███       | 81/264 [00:12<00:29,  6.25it/s]

epoch 17 step 79 loss tensor(0.0048, device='cuda:0', grad_fn=<DivBackward0>)


 46%|████▌     | 121/264 [00:19<00:22,  6.24it/s]

epoch 17 step 119 loss tensor(0.0019, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 161/264 [00:25<00:16,  6.24it/s]

epoch 17 step 159 loss tensor(0.0082, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 201/264 [00:32<00:10,  6.22it/s]

epoch 17 step 199 loss tensor(0.0011, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████▏| 241/264 [00:38<00:03,  5.98it/s]

epoch 17 step 239 loss tensor(0.0035, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 264/264 [00:42<00:00,  6.24it/s]


epoch 17 step 263 trainLoss: 0.002348313480207721


100%|██████████| 66/66 [00:03<00:00, 18.11it/s]


epoch  17 step 263 acc  0.8939393939393939
Early Stopping record count 1 Max eval acc 0.9015151515151515


 16%|█▌        | 41/264 [00:06<00:35,  6.22it/s]

epoch 18 step 39 loss tensor(0.0024, device='cuda:0', grad_fn=<DivBackward0>)


 31%|███       | 81/264 [00:12<00:29,  6.21it/s]

epoch 18 step 79 loss tensor(0.0074, device='cuda:0', grad_fn=<DivBackward0>)


 46%|████▌     | 121/264 [00:19<00:23,  6.21it/s]

epoch 18 step 119 loss tensor(0.0022, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 161/264 [00:25<00:16,  6.21it/s]

epoch 18 step 159 loss tensor(0.0042, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 201/264 [00:32<00:10,  6.22it/s]

epoch 18 step 199 loss tensor(0.0021, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████▏| 241/264 [00:38<00:03,  6.19it/s]

epoch 18 step 239 loss tensor(0.0015, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 264/264 [00:42<00:00,  6.22it/s]


epoch 18 step 263 trainLoss: 0.002254000963865


100%|██████████| 66/66 [00:03<00:00, 18.09it/s]


epoch  18 step 263 acc  0.8939393939393939
Early Stopping record count 2 Max eval acc 0.9015151515151515


 16%|█▌        | 41/264 [00:06<00:35,  6.21it/s]

epoch 19 step 39 loss tensor(0.0048, device='cuda:0', grad_fn=<DivBackward0>)


 31%|███       | 81/264 [00:13<00:29,  6.20it/s]

epoch 19 step 79 loss tensor(0.0076, device='cuda:0', grad_fn=<DivBackward0>)


 46%|████▌     | 121/264 [00:19<00:23,  6.20it/s]

epoch 19 step 119 loss tensor(0.0036, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 161/264 [00:25<00:16,  6.21it/s]

epoch 19 step 159 loss tensor(0.0116, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 201/264 [00:32<00:09,  6.84it/s]

epoch 19 step 199 loss tensor(0.0008, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████▏| 241/264 [00:38<00:03,  6.19it/s]

epoch 19 step 239 loss tensor(0.0009, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 264/264 [00:42<00:00,  6.23it/s]


epoch 19 step 263 trainLoss: 0.0020449089878445993


100%|██████████| 66/66 [00:03<00:00, 18.16it/s]


epoch  19 step 263 acc  0.9015151515151515


 16%|█▌        | 41/264 [00:06<00:35,  6.21it/s]

epoch 20 step 39 loss tensor(0.0056, device='cuda:0', grad_fn=<DivBackward0>)


 31%|███       | 81/264 [00:12<00:29,  6.21it/s]

epoch 20 step 79 loss tensor(0.0097, device='cuda:0', grad_fn=<DivBackward0>)


 46%|████▌     | 121/264 [00:19<00:23,  6.21it/s]

epoch 20 step 119 loss tensor(0.0027, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 161/264 [00:25<00:16,  6.20it/s]

epoch 20 step 159 loss tensor(0.0019, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 201/264 [00:32<00:10,  6.21it/s]

epoch 20 step 199 loss tensor(0.0049, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████▏| 241/264 [00:38<00:03,  6.19it/s]

epoch 20 step 239 loss tensor(0.0034, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 264/264 [00:42<00:00,  6.22it/s]


epoch 20 step 263 trainLoss: 0.0017288875648470519


100%|██████████| 66/66 [00:03<00:00, 18.15it/s]


epoch  20 step 263 acc  0.9090909090909091
Update Max eval acc 0.9090909090909091


 16%|█▌        | 41/264 [00:06<00:35,  6.26it/s]

epoch 21 step 39 loss tensor(0.0029, device='cuda:0', grad_fn=<DivBackward0>)


 31%|███       | 81/264 [00:12<00:29,  6.25it/s]

epoch 21 step 79 loss tensor(0.0039, device='cuda:0', grad_fn=<DivBackward0>)


 46%|████▌     | 121/264 [00:19<00:22,  6.24it/s]

epoch 21 step 119 loss tensor(0.0030, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 161/264 [00:25<00:16,  6.24it/s]

epoch 21 step 159 loss tensor(0.0022, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 201/264 [00:32<00:10,  6.22it/s]

epoch 21 step 199 loss tensor(0.0037, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████▏| 241/264 [00:38<00:03,  6.20it/s]

epoch 21 step 239 loss tensor(0.0077, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 264/264 [00:42<00:00,  6.25it/s]


epoch 21 step 263 trainLoss: 0.0017582161446184866


100%|██████████| 66/66 [00:03<00:00, 18.13it/s]


epoch  21 step 263 acc  0.9015151515151515
Early Stopping record count 1 Max eval acc 0.9090909090909091


 16%|█▌        | 41/264 [00:06<00:35,  6.22it/s]

epoch 22 step 39 loss tensor(0.0020, device='cuda:0', grad_fn=<DivBackward0>)


 31%|███       | 81/264 [00:12<00:29,  6.21it/s]

epoch 22 step 79 loss tensor(0.0042, device='cuda:0', grad_fn=<DivBackward0>)


 46%|████▌     | 121/264 [00:19<00:23,  6.21it/s]

epoch 22 step 119 loss tensor(0.0023, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 161/264 [00:25<00:16,  6.22it/s]

epoch 22 step 159 loss tensor(0.0026, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 201/264 [00:32<00:10,  6.21it/s]

epoch 22 step 199 loss tensor(0.0025, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████▏| 241/264 [00:38<00:03,  6.18it/s]

epoch 22 step 239 loss tensor(0.0119, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 264/264 [00:42<00:00,  6.22it/s]


epoch 22 step 263 trainLoss: 0.0016482310001623719


100%|██████████| 66/66 [00:03<00:00, 18.12it/s]


epoch  22 step 263 acc  0.9015151515151515
Early Stopping record count 2 Max eval acc 0.9090909090909091


 16%|█▌        | 41/264 [00:06<00:35,  6.21it/s]

epoch 23 step 39 loss tensor(0.0035, device='cuda:0', grad_fn=<DivBackward0>)


 31%|███       | 81/264 [00:12<00:29,  6.18it/s]

epoch 23 step 79 loss tensor(0.0264, device='cuda:0', grad_fn=<DivBackward0>)


 46%|████▌     | 121/264 [00:19<00:23,  6.21it/s]

epoch 23 step 119 loss tensor(0.0027, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 161/264 [00:25<00:16,  6.20it/s]

epoch 23 step 159 loss tensor(0.0032, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 201/264 [00:32<00:10,  6.21it/s]

epoch 23 step 199 loss tensor(0.0019, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████▏| 241/264 [00:38<00:03,  6.18it/s]

epoch 23 step 239 loss tensor(0.0011, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 264/264 [00:42<00:00,  6.23it/s]


epoch 23 step 263 trainLoss: 0.001500680973218881


100%|██████████| 66/66 [00:03<00:00, 17.70it/s]


epoch  23 step 263 acc  0.9015151515151515
Early Stopping record count 3 Max eval acc 0.9090909090909091


 16%|█▌        | 41/264 [00:06<00:35,  6.20it/s]

epoch 24 step 39 loss tensor(0.0052, device='cuda:0', grad_fn=<DivBackward0>)


 31%|███       | 81/264 [00:13<00:29,  6.20it/s]

epoch 24 step 79 loss tensor(0.0057, device='cuda:0', grad_fn=<DivBackward0>)


 46%|████▌     | 121/264 [00:19<00:23,  6.20it/s]

epoch 24 step 119 loss tensor(0.0010, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 161/264 [00:25<00:16,  6.20it/s]

epoch 24 step 159 loss tensor(0.0030, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 201/264 [00:32<00:10,  6.21it/s]

epoch 24 step 199 loss tensor(0.0011, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████▏| 241/264 [00:38<00:03,  6.19it/s]

epoch 24 step 239 loss tensor(0.0017, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 264/264 [00:42<00:00,  6.21it/s]


epoch 24 step 263 trainLoss: 0.0015085305298491608


100%|██████████| 66/66 [00:03<00:00, 18.12it/s]


epoch  24 step 263 acc  0.8939393939393939
Early Stopping record count 4 Max eval acc 0.9090909090909091


 16%|█▌        | 41/264 [00:06<00:35,  6.22it/s]

epoch 25 step 39 loss tensor(0.0012, device='cuda:0', grad_fn=<DivBackward0>)


 31%|███       | 81/264 [00:12<00:29,  6.21it/s]

epoch 25 step 79 loss tensor(0.0160, device='cuda:0', grad_fn=<DivBackward0>)


 46%|████▌     | 121/264 [00:19<00:23,  6.21it/s]

epoch 25 step 119 loss tensor(0.0039, device='cuda:0', grad_fn=<DivBackward0>)


 61%|██████    | 161/264 [00:25<00:17,  5.76it/s]

epoch 25 step 159 loss tensor(0.0016, device='cuda:0', grad_fn=<DivBackward0>)


 76%|███████▌  | 201/264 [00:32<00:10,  6.21it/s]

epoch 25 step 199 loss tensor(0.0013, device='cuda:0', grad_fn=<DivBackward0>)


 91%|█████████▏| 241/264 [00:38<00:03,  6.19it/s]

epoch 25 step 239 loss tensor(0.0012, device='cuda:0', grad_fn=<DivBackward0>)


100%|██████████| 264/264 [00:42<00:00,  6.23it/s]


epoch 25 step 263 trainLoss: 0.0014154510034572784


100%|██████████| 66/66 [00:03<00:00, 18.14it/s]

epoch  25 step 263 acc  0.9015151515151515
Early Stopping:Epoch 25  Step 263 Eval_acc 0.9015151515151515



