In [1]:
from gpt2.modeling_gpt2 import GPT2LMHeadModel
import torch
from torch.utils.data.dataset import Dataset
from torch.utils.data.dataloader import DataLoader
from transformers import BertTokenizer, get_linear_schedule_with_warmup
from tqdm import tqdm
from datetime import datetime
import os
import numpy as np
import logging
import random
from torch.utils.tensorboard import SummaryWriter

In [2]:
def seed_everything(seed: int = 42):
    """Util to make training reproducible"""
    random.seed(seed)

    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)

    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

    if os.getenv("CUBLAS_WORKSPACE_CONFIG") is not None:
        torch.use_deterministic_algorithms(True)
        os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

def worker_init(worked_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

def set_logger(path):

    logger = logging.getLogger()
    handler = logging.FileHandler(path + "/train_log.txt")
    logger.setLevel(level=logging.INFO)
    handler.setLevel(logging.INFO)
    formatter = logging.Formatter(
        "%(asctime)s - %(filename)s - %(funcName)s - %(lineno)s - %(levelname)s\n%(message)s",
        "%Y-%m-%d %H:%M:%S",
    )
    handler.setFormatter(formatter)
    console = logging.StreamHandler()
    console.setFormatter(formatter)
    logger.addHandler(handler)
    logger.addHandler(console)


class Chinese_Medical_DS(Dataset):
    def __init__(self, path, tokenizer, max_len=1024):
        self.path = path
        sentence = []
        self.private_positions_list = []
        
        self.total_private_tokens = 0
        self.total_tokens = 0
        self.private_tokens_per_sentence = []
        
        with open(self.path, 'r', encoding='utf-8') as f:
            for line in f:
                sen_ids = tokenizer.convert_tokens_to_ids(tokenizer.tokenize(line.strip()))
                full_sen = []
                
                full_sen.append(tokenizer.convert_tokens_to_ids('[MASK]'))
                full_sen.extend(sen_ids)
                full_sen.append(tokenizer.convert_tokens_to_ids('[CLS]'))
                private_positions = [i for i, token_id in enumerate(full_sen) if tokenizer.decode([token_id]).isdigit()]
                self.private_positions_list.append(private_positions)
                if len(full_sen) <= max_len:
                    sentence.append(full_sen)
                
                num_private_tokens = len(private_positions)
                self.total_private_tokens += num_private_tokens
                self.total_tokens += len(full_sen)
                self.private_tokens_per_sentence.append(num_private_tokens)
                
        self.data = sentence
        
        # 计算平均每句话中的隐私token数
        average_private_tokens_per_sentence = sum(self.private_tokens_per_sentence) / len(self.private_tokens_per_sentence)

        # 计算总的隐私token占总token的比例
        private_token_ratio = self.total_private_tokens / self.total_tokens

        # 计算隐私token个数的均值和方差
        private_token_mean = np.mean(self.private_tokens_per_sentence)
        private_token_variance = np.var(self.private_tokens_per_sentence)

        # 打印统计信息
        print(f"Average private tokens per sentence: {average_private_tokens_per_sentence:.2f}")
        print(f"Private token ratio: {private_token_ratio:.4f}")
        print(f"Private token count mean: {private_token_mean:.2f}")
        print(f"Private token count variance: {private_token_variance:.2f}")
        
    # need to overload
    def __len__(self):
        return len(self.data)

    # need to overload
    def __getitem__(self, idx):
        input = self.data[idx]
        target = input
        private_positions = self.private_positions_list[idx]
        return input, target, private_positions
    
class EarlyStopping:
    """Early stops the training if validation loss doesn't improve after a given patience."""

    def __init__(self, save_path, patience=2, verbose=True, delta=0):
        """
        Args:
            save_path : 模型保存文件夹
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement.
                            Default: False
            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                            Default: 0
        """
        self.save_path = save_path
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta

    def __call__(self, val_loss, model):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            print(f"EarlyStopping counter: {self.counter} out of {self.patience}")
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        """Saves model when validation loss decrease."""
        if self.verbose:
            print(
                f"Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ..."
            )
        model_to_save = model.module if hasattr(model, 'module') else model
        model_to_save.save_pretrained(self.save_path + 'best_model')
        
        # path = os.path.join(self.save_path, "best_network.pth")
        # torch.save(model.state_dict(), path)  # 这里会存储迄今最优模型的参数
        self.val_loss_min = val_loss

In [4]:
seed_everything()
tok_path = '..\\..\\Raw_GPT2\\vocab.txt'
pretrain_model_path = "..\\..\\Raw_GPT2\\"
output_dir = "model\\"

epochs = 50
warmup_steps = 1000
lr = 1e-5
gradient_accumulation = 18
max_grad_norm = 1.0
log_step = 10000
set_logger(output_dir)
logger = logging.getLogger(__name__)


In [5]:
tokenizer = BertTokenizer(vocab_file=tok_path)
model = GPT2LMHeadModel.from_pretrained(pretrain_model_path)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
logger.info('using device:{}'.format(device))
model.train()
model.to(device)
logger.info(model)

GPT2LMHeadModel::init
config =  GPT2Config {
  "_name_or_path": "..\\..\\Raw_GPT2\\",
  "activation_function": "gelu_new",
  "architectures": [
    "GPT2LMHeadModel"
  ],
  "attn_pdrop": 0.1,
  "bos_token_id": 50256,
  "embd_pdrop": 0.1,
  "eos_token_id": 50256,
  "gradient_checkpointing": false,
  "initializer_range": 0.02,
  "layer_norm_epsilon": 1e-05,
  "model_type": "gpt2",
  "n_ctx": 1024,
  "n_embd": 768,
  "n_head": 12,
  "n_inner": null,
  "n_layer": 12,
  "n_positions": 1024,
  "output_past": true,
  "reorder_and_upcast_attn": false,
  "resid_pdrop": 0.1,
  "scale_attn_by_inverse_layer_idx": false,
  "scale_attn_weights": true,
  "summary_activation": null,
  "summary_first_dropout": 0.1,
  "summary_proj_to_labels": true,
  "summary_type": "cls_index",
  "summary_use_proj": true,
  "task_specific_params": {
    "text-generation": {
      "do_sample": true,
      "max_length": 320
    }
  },
  "tokenizer_class": "BertTokenizer",
  "transformers_version": "4.24.0",
  "use_cache

2023-04-01 19:09:57 - 945538542.py - <module> - 4 - INFO
using device:cuda
2023-04-01 19:09:57 - 945538542.py - <module> - 7 - INFO
GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(21128, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0): GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
      (1): GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c

In [6]:
train_dataset = Chinese_Medical_DS("..\\Data_With_Canary\\tiny_train.txt", tokenizer)
train_dataloader = DataLoader(dataset=train_dataset, worker_init_fn=worker_init)
valid_dataset = Chinese_Medical_DS("..\\Data_With_Canary\\tiny_valid.txt", tokenizer)
valid_dataloader = DataLoader(dataset=valid_dataset, worker_init_fn=worker_init)
logger.info("len(train_dataloader), len(valid_dataloader) = {}, {}".format(len(train_dataloader), len(valid_dataloader)))

Average private tokens per sentence: 1.22
Private token ratio: 0.0055
Private token count mean: 1.22
Private token count variance: 4.91


2023-04-01 19:10:56 - 1768740697.py - <module> - 5 - INFO
len(train_dataloader), len(valid_dataloader) = 9000, 1500


Average private tokens per sentence: 1.22
Private token ratio: 0.0055
Private token count mean: 1.22
Private token count variance: 5.27


In [7]:
optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=warmup_steps,
                                                          num_training_steps=len(train_dataloader))
tb_path = output_dir + "/tb"
if not os.path.exists(tb_path):
    os.mkdir(tb_path)
writer = SummaryWriter(tb_path)

In [8]:
train_num_log = len(train_dataloader) * np.log(len(train_dataloader))
delta = 1.0 / train_num_log if 1.0 / train_num_log < 1e-5 else 1e-5
epsilon = 0.5
sigma = 0.810546875
epsilon, delta, sigma

(0.5, 1e-05, 0.810546875)

In [9]:
running_loss = 0
early_stopping = EarlyStopping(output_dir)
train_step_per_epoch = len(train_dataloader)
valid_step_per_epoch = len(valid_dataloader)
for epoch in range(epochs):
    logger.info('epoch {}'.format(epoch + 1))
    now = datetime.now()
    logger.info('time: {}'.format(now))
    model.train()
    train_pbar = tqdm(train_dataloader)
    all_train_loss = 0.0
    train_pbar.set_description('epoch-' + str(epoch + 1))
    
    for step, (input, label, private_positions) in enumerate(train_pbar):
        input_ids = torch.tensor(label).long().to(device)
        label_ids = torch.tensor(input).long().to(device)
        private_positions = torch.tensor(private_positions, dtype=torch.long).to(device)

        #  forward pass
        outputs = model(input_ids=input_ids, labels=label_ids, private_positions=private_positions, sigma=sigma)
        loss, logits = outputs[:2]
        
        if gradient_accumulation > 1:
            loss = loss / gradient_accumulation
            
        #  loss backward
        # if fp16:
        #     with amp.scale_loss(loss, optimizer) as scaled_loss:
        #         scaled_loss.backward()
        #         torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), max_grad_norm)
        # else:
        #     loss.backward()
        #     torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
        
        loss.backward()
        loss = loss.detach()
        all_train_loss += loss
        
        writer.add_scalar('loss/train_step_loss', scalar_value=loss * gradient_accumulation, global_step=epoch * train_step_per_epoch+step)
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)

        #  optimizer step
        if (step + 1) % gradient_accumulation == 0:
            running_loss += loss.item()
            optimizer.step()
            optimizer.zero_grad()
            scheduler.step()
        if (step + 1) % log_step == 0:
            logger.info('now time: {}:{}. Step {} of epoch {}, loss {}'.format(
                datetime.now().hour,
                datetime.now().minute,
                (step + 1) // gradient_accumulation,
                epoch + 1,
                running_loss / log_step))
            running_loss = 0
        
        train_pbar.set_postfix({'loss': '{:.7f}'.format(loss*gradient_accumulation)})
        
    logger.info('train step = {}'.format(step))
    all_train_loss = all_train_loss / (step + 1)

    writer.add_scalar('loss/train_epoch_loss', scalar_value=all_train_loss * gradient_accumulation, global_step=epoch + 1)
    logger.info('saving model for epoch {}'.format(epoch + 1))
    if not os.path.exists(output_dir + 'model_epoch{}'.format(epoch + 1)):
        os.mkdir(output_dir + 'model_epoch{}'.format(epoch + 1))
    model_to_save = model.module if hasattr(model, 'module') else model
    model_to_save.save_pretrained(output_dir + 'model_epoch{}'.format(epoch + 1))

    logger.info('epoch {} finished, train loss = {:.10f}'.format(epoch + 1, all_train_loss * gradient_accumulation))

    then = datetime.now()
    logger.info('time: {}'.format(then))
    logger.info('time for one epoch: {}'.format(then - now))
    
    logger.info('start validate')
    model.eval()
    all_valid_loss = 0.0
    valid_pbar = tqdm(valid_dataloader)
    valid_pbar.set_description('valid ' + str(epoch + 1))
    for step, (input, label, private_positions) in enumerate(valid_pbar):
        input_ids = torch.tensor(label).long().to(device)
        label_ids = torch.tensor(input).long().to(device)
        private_positions = torch.tensor(private_positions, dtype=torch.long).to(device)

        #  forward pass
        outputs = model(input_ids=input_ids, labels=label_ids, private_positions=private_positions, sigma=sigma)
        loss = outputs[0].detach()
        writer.add_scalar('loss/valid_step_loss', scalar_value=loss, global_step=epoch * valid_step_per_epoch + step)
        all_valid_loss += loss
        valid_pbar.set_postfix({'loss': '{:.7f}'.format(loss)})
    
    logger.info('valid step = {}'.format(step))
    all_valid_loss = all_valid_loss / (step + 1)
    writer.add_scalar('loss/valid_epoch_loss', scalar_value=all_valid_loss, global_step=epoch+1)
    logger.info('valid finished, valid loss = {:.10f}'.format(all_valid_loss))
    early_stopping(all_valid_loss, model)
    if early_stopping.early_stop:
        logger.info("Early stopping")
        break

writer.close()    

logger.info('training finished')
if not os.path.exists(output_dir + 'final_model'):
    os.mkdir(output_dir + 'final_model')
model_to_save = model.module if hasattr(model, 'module') else model
model_to_save.save_pretrained(output_dir + 'final_model')

2023-04-01 19:11:33 - 1501083698.py - <module> - 6 - INFO
epoch 1
2023-04-01 19:11:33 - 1501083698.py - <module> - 8 - INFO
time: 2023-04-01 19:11:33.665602
epoch-1: 100%|██████████| 9000/9000 [08:28<00:00, 17.72it/s, loss=2.8891165]
2023-04-01 19:20:01 - 1501083698.py - <module> - 60 - INFO
train step = 8999
2023-04-01 19:20:01 - 1501083698.py - <module> - 64 - INFO
saving model for epoch 1
2023-04-01 19:20:02 - 1501083698.py - <module> - 70 - INFO
epoch 1 finished, train loss = 2.6338682175
2023-04-01 19:20:02 - 1501083698.py - <module> - 73 - INFO
time: 2023-04-01 19:20:02.054110
2023-04-01 19:20:02 - 1501083698.py - <module> - 74 - INFO
time for one epoch: 0:08:28.388508
2023-04-01 19:20:02 - 1501083698.py - <module> - 76 - INFO
start validate
valid 1: 100%|██████████| 1500/1500 [00:33<00:00, 45.11it/s, loss=2.7356434]
2023-04-01 19:20:35 - 1501083698.py - <module> - 93 - INFO
valid step = 1499
2023-04-01 19:20:35 - 1501083698.py - <module> - 96 - INFO
valid finished, valid loss = 

Validation loss decreased (inf --> 2.403903).  Saving model ...


2023-04-01 19:20:35 - 1501083698.py - <module> - 6 - INFO
epoch 2
2023-04-01 19:20:35 - 1501083698.py - <module> - 8 - INFO
time: 2023-04-01 19:20:35.710905
epoch-2: 100%|██████████| 9000/9000 [08:22<00:00, 17.93it/s, loss=2.6652870]
2023-04-01 19:28:57 - 1501083698.py - <module> - 60 - INFO
train step = 8999
2023-04-01 19:28:57 - 1501083698.py - <module> - 64 - INFO
saving model for epoch 2
2023-04-01 19:28:58 - 1501083698.py - <module> - 70 - INFO
epoch 2 finished, train loss = 2.3730046749
2023-04-01 19:28:58 - 1501083698.py - <module> - 73 - INFO
time: 2023-04-01 19:28:58.130479
2023-04-01 19:28:58 - 1501083698.py - <module> - 74 - INFO
time for one epoch: 0:08:22.419574
2023-04-01 19:28:58 - 1501083698.py - <module> - 76 - INFO
start validate
valid 2: 100%|██████████| 1500/1500 [00:31<00:00, 47.31it/s, loss=2.6948478]
2023-04-01 19:29:29 - 1501083698.py - <module> - 93 - INFO
valid step = 1499
2023-04-01 19:29:29 - 1501083698.py - <module> - 96 - INFO
valid finished, valid loss = 

Validation loss decreased (2.403903 --> 2.242714).  Saving model ...


2023-04-01 19:29:30 - 1501083698.py - <module> - 6 - INFO
epoch 3
2023-04-01 19:29:30 - 1501083698.py - <module> - 8 - INFO
time: 2023-04-01 19:29:30.238533
epoch-3: 100%|██████████| 9000/9000 [08:06<00:00, 18.50it/s, loss=2.5444379]
2023-04-01 19:37:36 - 1501083698.py - <module> - 60 - INFO
train step = 8999
2023-04-01 19:37:36 - 1501083698.py - <module> - 64 - INFO
saving model for epoch 3
2023-04-01 19:37:36 - 1501083698.py - <module> - 70 - INFO
epoch 3 finished, train loss = 2.2254943848
2023-04-01 19:37:36 - 1501083698.py - <module> - 73 - INFO
time: 2023-04-01 19:37:36.996459
2023-04-01 19:37:36 - 1501083698.py - <module> - 74 - INFO
time for one epoch: 0:08:06.757926
2023-04-01 19:37:36 - 1501083698.py - <module> - 76 - INFO
start validate
valid 3: 100%|██████████| 1500/1500 [00:31<00:00, 47.85it/s, loss=2.6843152]
2023-04-01 19:38:08 - 1501083698.py - <module> - 93 - INFO
valid step = 1499
2023-04-01 19:38:08 - 1501083698.py - <module> - 96 - INFO
valid finished, valid loss = 

Validation loss decreased (2.242714 --> 2.146797).  Saving model ...


2023-04-01 19:38:08 - 1501083698.py - <module> - 6 - INFO
epoch 4
2023-04-01 19:38:08 - 1501083698.py - <module> - 8 - INFO
time: 2023-04-01 19:38:08.765694
epoch-4: 100%|██████████| 9000/9000 [08:06<00:00, 18.50it/s, loss=2.4734716]
2023-04-01 19:46:15 - 1501083698.py - <module> - 60 - INFO
train step = 8999
2023-04-01 19:46:15 - 1501083698.py - <module> - 64 - INFO
saving model for epoch 4
2023-04-01 19:46:15 - 1501083698.py - <module> - 70 - INFO
epoch 4 finished, train loss = 2.1333711147
2023-04-01 19:46:15 - 1501083698.py - <module> - 73 - INFO
time: 2023-04-01 19:46:15.686600
2023-04-01 19:46:15 - 1501083698.py - <module> - 74 - INFO
time for one epoch: 0:08:06.920906
2023-04-01 19:46:15 - 1501083698.py - <module> - 76 - INFO
start validate
valid 4: 100%|██████████| 1500/1500 [00:31<00:00, 47.57it/s, loss=2.6628971]
2023-04-01 19:46:47 - 1501083698.py - <module> - 93 - INFO
valid step = 1499
2023-04-01 19:46:47 - 1501083698.py - <module> - 96 - INFO
valid finished, valid loss = 

Validation loss decreased (2.146797 --> 2.095748).  Saving model ...


2023-04-01 19:46:47 - 1501083698.py - <module> - 6 - INFO
epoch 5
2023-04-01 19:46:47 - 1501083698.py - <module> - 8 - INFO
time: 2023-04-01 19:46:47.617201
epoch-5: 100%|██████████| 9000/9000 [08:05<00:00, 18.53it/s, loss=2.4102702]
2023-04-01 19:54:53 - 1501083698.py - <module> - 60 - INFO
train step = 8999
2023-04-01 19:54:53 - 1501083698.py - <module> - 64 - INFO
saving model for epoch 5
2023-04-01 19:54:53 - 1501083698.py - <module> - 70 - INFO
epoch 5 finished, train loss = 2.0733387470
2023-04-01 19:54:53 - 1501083698.py - <module> - 73 - INFO
time: 2023-04-01 19:54:53.743605
2023-04-01 19:54:53 - 1501083698.py - <module> - 74 - INFO
time for one epoch: 0:08:06.126404
2023-04-01 19:54:53 - 1501083698.py - <module> - 76 - INFO
start validate
valid 5: 100%|██████████| 1500/1500 [00:31<00:00, 47.40it/s, loss=2.6537900]
2023-04-01 19:55:25 - 1501083698.py - <module> - 93 - INFO
valid step = 1499
2023-04-01 19:55:25 - 1501083698.py - <module> - 96 - INFO
valid finished, valid loss = 

Validation loss decreased (2.095748 --> 2.061639).  Saving model ...


2023-04-01 19:55:25 - 1501083698.py - <module> - 6 - INFO
epoch 6
2023-04-01 19:55:25 - 1501083698.py - <module> - 8 - INFO
time: 2023-04-01 19:55:25.790013
epoch-6: 100%|██████████| 9000/9000 [07:58<00:00, 18.80it/s, loss=2.4510117]
2023-04-01 20:03:24 - 1501083698.py - <module> - 60 - INFO
train step = 8999
2023-04-01 20:03:24 - 1501083698.py - <module> - 64 - INFO
saving model for epoch 6
2023-04-01 20:03:24 - 1501083698.py - <module> - 70 - INFO
epoch 6 finished, train loss = 2.0295300484
2023-04-01 20:03:24 - 1501083698.py - <module> - 73 - INFO
time: 2023-04-01 20:03:24.955696
2023-04-01 20:03:24 - 1501083698.py - <module> - 74 - INFO
time for one epoch: 0:07:59.165683
2023-04-01 20:03:24 - 1501083698.py - <module> - 76 - INFO
start validate
valid 6: 100%|██████████| 1500/1500 [00:30<00:00, 49.58it/s, loss=2.6595862]
2023-04-01 20:03:55 - 1501083698.py - <module> - 93 - INFO
valid step = 1499
2023-04-01 20:03:55 - 1501083698.py - <module> - 96 - INFO
valid finished, valid loss = 

Validation loss decreased (2.061639 --> 2.038350).  Saving model ...


2023-04-01 20:03:55 - 1501083698.py - <module> - 6 - INFO
epoch 7
2023-04-01 20:03:55 - 1501083698.py - <module> - 8 - INFO
time: 2023-04-01 20:03:55.611697
epoch-7: 100%|██████████| 9000/9000 [07:55<00:00, 18.95it/s, loss=2.3808010]
2023-04-01 20:11:50 - 1501083698.py - <module> - 60 - INFO
train step = 8999
2023-04-01 20:11:50 - 1501083698.py - <module> - 64 - INFO
saving model for epoch 7
2023-04-01 20:11:51 - 1501083698.py - <module> - 70 - INFO
epoch 7 finished, train loss = 1.9947050810
2023-04-01 20:11:51 - 1501083698.py - <module> - 73 - INFO
time: 2023-04-01 20:11:51.025694
2023-04-01 20:11:51 - 1501083698.py - <module> - 74 - INFO
time for one epoch: 0:07:55.413997
2023-04-01 20:11:51 - 1501083698.py - <module> - 76 - INFO
start validate
valid 7: 100%|██████████| 1500/1500 [00:30<00:00, 49.18it/s, loss=2.6488523]
2023-04-01 20:12:21 - 1501083698.py - <module> - 93 - INFO
valid step = 1499
2023-04-01 20:12:21 - 1501083698.py - <module> - 96 - INFO
valid finished, valid loss = 

Validation loss decreased (2.038350 --> 2.020854).  Saving model ...


2023-04-01 20:12:21 - 1501083698.py - <module> - 6 - INFO
epoch 8
2023-04-01 20:12:21 - 1501083698.py - <module> - 8 - INFO
time: 2023-04-01 20:12:21.967695
epoch-8: 100%|██████████| 9000/9000 [07:54<00:00, 18.95it/s, loss=2.3870430]
2023-04-01 20:20:16 - 1501083698.py - <module> - 60 - INFO
train step = 8999
2023-04-01 20:20:16 - 1501083698.py - <module> - 64 - INFO
saving model for epoch 8
2023-04-01 20:20:17 - 1501083698.py - <module> - 70 - INFO
epoch 8 finished, train loss = 1.9670472145
2023-04-01 20:20:17 - 1501083698.py - <module> - 73 - INFO
time: 2023-04-01 20:20:17.208696
2023-04-01 20:20:17 - 1501083698.py - <module> - 74 - INFO
time for one epoch: 0:07:55.241001
2023-04-01 20:20:17 - 1501083698.py - <module> - 76 - INFO
start validate
valid 8: 100%|██████████| 1500/1500 [00:30<00:00, 49.42it/s, loss=2.6449416]
2023-04-01 20:20:47 - 1501083698.py - <module> - 93 - INFO
valid step = 1499
2023-04-01 20:20:47 - 1501083698.py - <module> - 96 - INFO
valid finished, valid loss = 

Validation loss decreased (2.020854 --> 2.005835).  Saving model ...


2023-04-01 20:20:47 - 1501083698.py - <module> - 6 - INFO
epoch 9
2023-04-01 20:20:47 - 1501083698.py - <module> - 8 - INFO
time: 2023-04-01 20:20:47.957697
epoch-9: 100%|██████████| 9000/9000 [07:55<00:00, 18.93it/s, loss=2.3835890]
2023-04-01 20:28:43 - 1501083698.py - <module> - 60 - INFO
train step = 8999
2023-04-01 20:28:43 - 1501083698.py - <module> - 64 - INFO
saving model for epoch 9
2023-04-01 20:28:43 - 1501083698.py - <module> - 70 - INFO
epoch 9 finished, train loss = 1.9439495802
2023-04-01 20:28:43 - 1501083698.py - <module> - 73 - INFO
time: 2023-04-01 20:28:43.688695
2023-04-01 20:28:43 - 1501083698.py - <module> - 74 - INFO
time for one epoch: 0:07:55.730998
2023-04-01 20:28:43 - 1501083698.py - <module> - 76 - INFO
start validate
valid 9: 100%|██████████| 1500/1500 [00:30<00:00, 49.37it/s, loss=2.6519995]
2023-04-01 20:29:14 - 1501083698.py - <module> - 93 - INFO
valid step = 1499
2023-04-01 20:29:14 - 1501083698.py - <module> - 96 - INFO
valid finished, valid loss = 

Validation loss decreased (2.005835 --> 1.996425).  Saving model ...


2023-04-01 20:29:14 - 1501083698.py - <module> - 6 - INFO
epoch 10
2023-04-01 20:29:14 - 1501083698.py - <module> - 8 - INFO
time: 2023-04-01 20:29:14.495695
epoch-10: 100%|██████████| 9000/9000 [07:55<00:00, 18.94it/s, loss=2.3429942]
2023-04-01 20:37:09 - 1501083698.py - <module> - 60 - INFO
train step = 8999
2023-04-01 20:37:09 - 1501083698.py - <module> - 64 - INFO
saving model for epoch 10
2023-04-01 20:37:10 - 1501083698.py - <module> - 70 - INFO
epoch 10 finished, train loss = 1.9241091013
2023-04-01 20:37:10 - 1501083698.py - <module> - 73 - INFO
time: 2023-04-01 20:37:10.184695
2023-04-01 20:37:10 - 1501083698.py - <module> - 74 - INFO
time for one epoch: 0:07:55.689000
2023-04-01 20:37:10 - 1501083698.py - <module> - 76 - INFO
start validate
valid 10: 100%|██████████| 1500/1500 [00:30<00:00, 49.14it/s, loss=2.6512003]
2023-04-01 20:37:40 - 1501083698.py - <module> - 93 - INFO
valid step = 1499
2023-04-01 20:37:40 - 1501083698.py - <module> - 96 - INFO
valid finished, valid lo

Validation loss decreased (1.996425 --> 1.986995).  Saving model ...


2023-04-01 20:37:41 - 1501083698.py - <module> - 6 - INFO
epoch 11
2023-04-01 20:37:41 - 1501083698.py - <module> - 8 - INFO
time: 2023-04-01 20:37:41.106696
epoch-11: 100%|██████████| 9000/9000 [07:55<00:00, 18.93it/s, loss=2.3166363]
2023-04-01 20:45:36 - 1501083698.py - <module> - 60 - INFO
train step = 8999
2023-04-01 20:45:36 - 1501083698.py - <module> - 64 - INFO
saving model for epoch 11
2023-04-01 20:45:36 - 1501083698.py - <module> - 70 - INFO
epoch 11 finished, train loss = 1.9068742990
2023-04-01 20:45:36 - 1501083698.py - <module> - 73 - INFO
time: 2023-04-01 20:45:36.944695
2023-04-01 20:45:36 - 1501083698.py - <module> - 74 - INFO
time for one epoch: 0:07:55.837999
2023-04-01 20:45:36 - 1501083698.py - <module> - 76 - INFO
start validate
valid 11: 100%|██████████| 1500/1500 [00:30<00:00, 49.38it/s, loss=2.6446867]
2023-04-01 20:46:07 - 1501083698.py - <module> - 93 - INFO
valid step = 1499
2023-04-01 20:46:07 - 1501083698.py - <module> - 96 - INFO
valid finished, valid lo

Validation loss decreased (1.986995 --> 1.980014).  Saving model ...


2023-04-01 20:46:07 - 1501083698.py - <module> - 6 - INFO
epoch 12
2023-04-01 20:46:07 - 1501083698.py - <module> - 8 - INFO
time: 2023-04-01 20:46:07.716737
epoch-12: 100%|██████████| 9000/9000 [07:55<00:00, 18.92it/s, loss=2.3014100]
2023-04-01 20:54:03 - 1501083698.py - <module> - 60 - INFO
train step = 8999
2023-04-01 20:54:03 - 1501083698.py - <module> - 64 - INFO
saving model for epoch 12
2023-04-01 20:54:03 - 1501083698.py - <module> - 70 - INFO
epoch 12 finished, train loss = 1.8927189112
2023-04-01 20:54:03 - 1501083698.py - <module> - 73 - INFO
time: 2023-04-01 20:54:03.765736
2023-04-01 20:54:03 - 1501083698.py - <module> - 74 - INFO
time for one epoch: 0:07:56.048999
2023-04-01 20:54:03 - 1501083698.py - <module> - 76 - INFO
start validate
valid 12: 100%|██████████| 1500/1500 [00:30<00:00, 49.13it/s, loss=2.6376595]
2023-04-01 20:54:34 - 1501083698.py - <module> - 93 - INFO
valid step = 1499
2023-04-01 20:54:34 - 1501083698.py - <module> - 96 - INFO
valid finished, valid lo

Validation loss decreased (1.980014 --> 1.973910).  Saving model ...


2023-04-01 20:54:34 - 1501083698.py - <module> - 6 - INFO
epoch 13
2023-04-01 20:54:34 - 1501083698.py - <module> - 8 - INFO
time: 2023-04-01 20:54:34.707737
epoch-13: 100%|██████████| 9000/9000 [07:56<00:00, 18.89it/s, loss=2.3063149]
2023-04-01 21:02:31 - 1501083698.py - <module> - 60 - INFO
train step = 8999
2023-04-01 21:02:31 - 1501083698.py - <module> - 64 - INFO
saving model for epoch 13
2023-04-01 21:02:31 - 1501083698.py - <module> - 70 - INFO
epoch 13 finished, train loss = 1.8808150291
2023-04-01 21:02:31 - 1501083698.py - <module> - 73 - INFO
time: 2023-04-01 21:02:31.620736
2023-04-01 21:02:31 - 1501083698.py - <module> - 74 - INFO
time for one epoch: 0:07:56.912999
2023-04-01 21:02:31 - 1501083698.py - <module> - 76 - INFO
start validate
valid 13: 100%|██████████| 1500/1500 [00:30<00:00, 49.47it/s, loss=2.6389885]
2023-04-01 21:03:01 - 1501083698.py - <module> - 93 - INFO
valid step = 1499
2023-04-01 21:03:01 - 1501083698.py - <module> - 96 - INFO
valid finished, valid lo

Validation loss decreased (1.973910 --> 1.969586).  Saving model ...


2023-04-01 21:03:02 - 1501083698.py - <module> - 6 - INFO
epoch 14
2023-04-01 21:03:02 - 1501083698.py - <module> - 8 - INFO
time: 2023-04-01 21:03:02.342736
epoch-14: 100%|██████████| 9000/9000 [07:55<00:00, 18.92it/s, loss=2.2826343]
2023-04-01 21:10:58 - 1501083698.py - <module> - 60 - INFO
train step = 8999
2023-04-01 21:10:58 - 1501083698.py - <module> - 64 - INFO
saving model for epoch 14
2023-04-01 21:10:58 - 1501083698.py - <module> - 70 - INFO
epoch 14 finished, train loss = 1.8708097935
2023-04-01 21:10:58 - 1501083698.py - <module> - 73 - INFO
time: 2023-04-01 21:10:58.501738
2023-04-01 21:10:58 - 1501083698.py - <module> - 74 - INFO
time for one epoch: 0:07:56.159002
2023-04-01 21:10:58 - 1501083698.py - <module> - 76 - INFO
start validate
valid 14: 100%|██████████| 1500/1500 [00:30<00:00, 49.14it/s, loss=2.6468775]
2023-04-01 21:11:29 - 1501083698.py - <module> - 93 - INFO
valid step = 1499
2023-04-01 21:11:29 - 1501083698.py - <module> - 96 - INFO
valid finished, valid lo

Validation loss decreased (1.969586 --> 1.966021).  Saving model ...


2023-04-01 21:11:29 - 1501083698.py - <module> - 6 - INFO
epoch 15
2023-04-01 21:11:29 - 1501083698.py - <module> - 8 - INFO
time: 2023-04-01 21:11:29.414736
epoch-15: 100%|██████████| 9000/9000 [08:26<00:00, 17.77it/s, loss=2.2829337]
2023-04-01 21:19:55 - 1501083698.py - <module> - 60 - INFO
train step = 8999
2023-04-01 21:19:55 - 1501083698.py - <module> - 64 - INFO
saving model for epoch 15
2023-04-01 21:19:56 - 1501083698.py - <module> - 70 - INFO
epoch 15 finished, train loss = 1.8634200096
2023-04-01 21:19:56 - 1501083698.py - <module> - 73 - INFO
time: 2023-04-01 21:19:56.201429
2023-04-01 21:19:56 - 1501083698.py - <module> - 74 - INFO
time for one epoch: 0:08:26.786693
2023-04-01 21:19:56 - 1501083698.py - <module> - 76 - INFO
start validate
valid 15: 100%|██████████| 1500/1500 [00:32<00:00, 46.71it/s, loss=2.6436141]
2023-04-01 21:20:28 - 1501083698.py - <module> - 93 - INFO
valid step = 1499
2023-04-01 21:20:28 - 1501083698.py - <module> - 96 - INFO
valid finished, valid lo

Validation loss decreased (1.966021 --> 1.963215).  Saving model ...


2023-04-01 21:20:28 - 1501083698.py - <module> - 6 - INFO
epoch 16
2023-04-01 21:20:28 - 1501083698.py - <module> - 8 - INFO
time: 2023-04-01 21:20:28.764428
epoch-16: 100%|██████████| 9000/9000 [09:31<00:00, 15.75it/s, loss=2.2885120]
2023-04-01 21:30:00 - 1501083698.py - <module> - 60 - INFO
train step = 8999
2023-04-01 21:30:00 - 1501083698.py - <module> - 64 - INFO
saving model for epoch 16
2023-04-01 21:30:00 - 1501083698.py - <module> - 70 - INFO
epoch 16 finished, train loss = 1.8571434021
2023-04-01 21:30:00 - 1501083698.py - <module> - 73 - INFO
time: 2023-04-01 21:30:00.663430
2023-04-01 21:30:00 - 1501083698.py - <module> - 74 - INFO
time for one epoch: 0:09:31.899002
2023-04-01 21:30:00 - 1501083698.py - <module> - 76 - INFO
start validate
valid 16: 100%|██████████| 1500/1500 [00:32<00:00, 46.21it/s, loss=2.6435807]
2023-04-01 21:30:33 - 1501083698.py - <module> - 93 - INFO
valid step = 1499
2023-04-01 21:30:33 - 1501083698.py - <module> - 96 - INFO
valid finished, valid lo

Validation loss decreased (1.963215 --> 1.961375).  Saving model ...


2023-04-01 21:30:33 - 1501083698.py - <module> - 6 - INFO
epoch 17
2023-04-01 21:30:33 - 1501083698.py - <module> - 8 - INFO
time: 2023-04-01 21:30:33.533430
epoch-17: 100%|██████████| 9000/9000 [09:51<00:00, 15.21it/s, loss=2.3058000]
2023-04-01 21:40:25 - 1501083698.py - <module> - 60 - INFO
train step = 8999
2023-04-01 21:40:25 - 1501083698.py - <module> - 64 - INFO
saving model for epoch 17
2023-04-01 21:40:25 - 1501083698.py - <module> - 70 - INFO
epoch 17 finished, train loss = 1.8524532318
2023-04-01 21:40:25 - 1501083698.py - <module> - 73 - INFO
time: 2023-04-01 21:40:25.902180
2023-04-01 21:40:25 - 1501083698.py - <module> - 74 - INFO
time for one epoch: 0:09:52.368750
2023-04-01 21:40:25 - 1501083698.py - <module> - 76 - INFO
start validate
valid 17: 100%|██████████| 1500/1500 [00:42<00:00, 35.01it/s, loss=2.6549041]
2023-04-01 21:41:08 - 1501083698.py - <module> - 93 - INFO
valid step = 1499
2023-04-01 21:41:08 - 1501083698.py - <module> - 96 - INFO
valid finished, valid lo

Validation loss decreased (1.961375 --> 1.959769).  Saving model ...


2023-04-01 21:41:09 - 1501083698.py - <module> - 6 - INFO
epoch 18
2023-04-01 21:41:09 - 1501083698.py - <module> - 8 - INFO
time: 2023-04-01 21:41:09.248804
epoch-18: 100%|██████████| 9000/9000 [10:18<00:00, 14.56it/s, loss=2.2248588]
2023-04-01 21:51:27 - 1501083698.py - <module> - 60 - INFO
train step = 8999
2023-04-01 21:51:27 - 1501083698.py - <module> - 64 - INFO
saving model for epoch 18
2023-04-01 21:51:28 - 1501083698.py - <module> - 70 - INFO
epoch 18 finished, train loss = 1.8498368263
2023-04-01 21:51:28 - 1501083698.py - <module> - 73 - INFO
time: 2023-04-01 21:51:28.022978
2023-04-01 21:51:28 - 1501083698.py - <module> - 74 - INFO
time for one epoch: 0:10:18.774174
2023-04-01 21:51:28 - 1501083698.py - <module> - 76 - INFO
start validate
valid 18: 100%|██████████| 1500/1500 [00:42<00:00, 34.99it/s, loss=2.6580944]
2023-04-01 21:52:10 - 1501083698.py - <module> - 93 - INFO
valid step = 1499
2023-04-01 21:52:10 - 1501083698.py - <module> - 96 - INFO
valid finished, valid lo

Validation loss decreased (1.959769 --> 1.958347).  Saving model ...


2023-04-01 21:52:11 - 1501083698.py - <module> - 6 - INFO
epoch 19
2023-04-01 21:52:11 - 1501083698.py - <module> - 8 - INFO
time: 2023-04-01 21:52:11.379533
epoch-19: 100%|██████████| 9000/9000 [10:15<00:00, 14.61it/s, loss=2.3143201]
2023-04-01 22:02:27 - 1501083698.py - <module> - 60 - INFO
train step = 8999
2023-04-01 22:02:27 - 1501083698.py - <module> - 64 - INFO
saving model for epoch 19
2023-04-01 22:02:27 - 1501083698.py - <module> - 70 - INFO
epoch 19 finished, train loss = 1.8479902744
2023-04-01 22:02:27 - 1501083698.py - <module> - 73 - INFO
time: 2023-04-01 22:02:27.638559
2023-04-01 22:02:27 - 1501083698.py - <module> - 74 - INFO
time for one epoch: 0:10:16.259026
2023-04-01 22:02:27 - 1501083698.py - <module> - 76 - INFO
start validate
valid 19: 100%|██████████| 1500/1500 [00:53<00:00, 28.14it/s, loss=2.6542649]
2023-04-01 22:03:20 - 1501083698.py - <module> - 93 - INFO
valid step = 1499
2023-04-01 22:03:20 - 1501083698.py - <module> - 96 - INFO
valid finished, valid lo

EarlyStopping counter: 1 out of 2


epoch-20: 100%|██████████| 9000/9000 [11:22<00:00, 13.18it/s, loss=2.2997625]
2023-04-01 22:14:43 - 1501083698.py - <module> - 60 - INFO
train step = 8999
2023-04-01 22:14:43 - 1501083698.py - <module> - 64 - INFO
saving model for epoch 20
2023-04-01 22:14:44 - 1501083698.py - <module> - 70 - INFO
epoch 20 finished, train loss = 1.8477326632
2023-04-01 22:14:44 - 1501083698.py - <module> - 73 - INFO
time: 2023-04-01 22:14:44.053168
2023-04-01 22:14:44 - 1501083698.py - <module> - 74 - INFO
time for one epoch: 0:11:23.093476
2023-04-01 22:14:44 - 1501083698.py - <module> - 76 - INFO
start validate
valid 20: 100%|██████████| 1500/1500 [00:52<00:00, 28.74it/s, loss=2.6515772]
2023-04-01 22:15:36 - 1501083698.py - <module> - 93 - INFO
valid step = 1499
2023-04-01 22:15:36 - 1501083698.py - <module> - 96 - INFO
valid finished, valid loss = 1.9584604502
2023-04-01 22:15:36 - 1501083698.py - <module> - 99 - INFO
Early stopping
2023-04-01 22:15:36 - 1501083698.py - <module> - 104 - INFO
traini

EarlyStopping counter: 2 out of 2
