In [1]:
from gpt2.modeling_gpt2 import GPT2LMHeadModel
import torch
from torch.utils.data.dataset import Dataset
from torch.utils.data.dataloader import DataLoader
from transformers import BertTokenizer, get_linear_schedule_with_warmup
from tqdm import tqdm
from datetime import datetime
import os
import numpy as np
import logging
import random
from torch.utils.tensorboard import SummaryWriter

In [2]:
def seed_everything(seed: int = 42):
    """Util to make training reproducible"""
    random.seed(seed)

    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)

    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

    if os.getenv("CUBLAS_WORKSPACE_CONFIG") is not None:
        torch.use_deterministic_algorithms(True)
        os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

def worker_init(worked_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

def set_logger(path):

    logger = logging.getLogger()
    handler = logging.FileHandler(path + "/train_log.txt")
    logger.setLevel(level=logging.INFO)
    handler.setLevel(logging.INFO)
    formatter = logging.Formatter(
        "%(asctime)s - %(filename)s - %(funcName)s - %(lineno)s - %(levelname)s\n%(message)s",
        "%Y-%m-%d %H:%M:%S",
    )
    handler.setFormatter(formatter)
    console = logging.StreamHandler()
    console.setFormatter(formatter)
    logger.addHandler(handler)
    logger.addHandler(console)


class Chinese_Medical_DS(Dataset):
    def __init__(self, path, tokenizer, max_len=1024):
        self.path = path
        sentence = []
        self.private_positions_list = []
        
        self.total_private_tokens = 0
        self.total_tokens = 0
        self.private_tokens_per_sentence = []
        
        with open(self.path, 'r', encoding='utf-8') as f:
            for line in f:
                sen_ids = tokenizer.convert_tokens_to_ids(tokenizer.tokenize(line.strip()))
                full_sen = []
                
                full_sen.append(tokenizer.convert_tokens_to_ids('[MASK]'))
                full_sen.extend(sen_ids)
                full_sen.append(tokenizer.convert_tokens_to_ids('[CLS]'))
                private_positions = [i for i, token_id in enumerate(full_sen) if tokenizer.decode([token_id]).isdigit()]
                self.private_positions_list.append(private_positions)
                if len(full_sen) <= max_len:
                    sentence.append(full_sen)
                
                num_private_tokens = len(private_positions)
                self.total_private_tokens += num_private_tokens
                self.total_tokens += len(full_sen)
                self.private_tokens_per_sentence.append(num_private_tokens)
                
        self.data = sentence
        
        # 计算平均每句话中的隐私token数
        average_private_tokens_per_sentence = sum(self.private_tokens_per_sentence) / len(self.private_tokens_per_sentence)

        # 计算总的隐私token占总token的比例
        private_token_ratio = self.total_private_tokens / self.total_tokens

        # 计算隐私token个数的均值和方差
        private_token_mean = np.mean(self.private_tokens_per_sentence)
        private_token_variance = np.var(self.private_tokens_per_sentence)

        # 打印统计信息
        print(f"Average private tokens per sentence: {average_private_tokens_per_sentence:.2f}")
        print(f"Private token ratio: {private_token_ratio:.4f}")
        print(f"Private token count mean: {private_token_mean:.2f}")
        print(f"Private token count variance: {private_token_variance:.2f}")
        
    # need to overload
    def __len__(self):
        return len(self.data)

    # need to overload
    def __getitem__(self, idx):
        input = self.data[idx]
        target = input
        private_positions = self.private_positions_list[idx]
        return input, target, private_positions
    
class EarlyStopping:
    """Early stops the training if validation loss doesn't improve after a given patience."""

    def __init__(self, save_path, patience=2, verbose=True, delta=0):
        """
        Args:
            save_path : 模型保存文件夹
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement.
                            Default: False
            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                            Default: 0
        """
        self.save_path = save_path
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta

    def __call__(self, val_loss, model):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            print(f"EarlyStopping counter: {self.counter} out of {self.patience}")
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        """Saves model when validation loss decrease."""
        if self.verbose:
            print(
                f"Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ..."
            )
        model_to_save = model.module if hasattr(model, 'module') else model
        model_to_save.save_pretrained(self.save_path + 'best_model')
        
        # path = os.path.join(self.save_path, "best_network.pth")
        # torch.save(model.state_dict(), path)  # 这里会存储迄今最优模型的参数
        self.val_loss_min = val_loss

In [3]:
seed_everything()
tok_path = '..\\..\\Raw_GPT2\\vocab.txt'
pretrain_model_path = "..\\..\\Raw_GPT2\\"
output_dir = "model\\"

epochs = 50
warmup_steps = 1000
lr = 1e-5
gradient_accumulation = 18
max_grad_norm = 1.0
log_step = 10000
set_logger(output_dir)
logger = logging.getLogger(__name__)


In [4]:
tokenizer = BertTokenizer(vocab_file=tok_path)
model = GPT2LMHeadModel.from_pretrained(pretrain_model_path)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
logger.info('using device:{}'.format(device))
model.train()
model.to(device)
logger.info(model)

GPT2LMHeadModel::init
config =  GPT2Config {
  "_name_or_path": "..\\..\\Raw_GPT2\\",
  "activation_function": "gelu_new",
  "architectures": [
    "GPT2LMHeadModel"
  ],
  "attn_pdrop": 0.1,
  "bos_token_id": 50256,
  "embd_pdrop": 0.1,
  "eos_token_id": 50256,
  "gradient_checkpointing": false,
  "initializer_range": 0.02,
  "layer_norm_epsilon": 1e-05,
  "model_type": "gpt2",
  "n_ctx": 1024,
  "n_embd": 768,
  "n_head": 12,
  "n_inner": null,
  "n_layer": 12,
  "n_positions": 1024,
  "output_past": true,
  "reorder_and_upcast_attn": false,
  "resid_pdrop": 0.1,
  "scale_attn_by_inverse_layer_idx": false,
  "scale_attn_weights": true,
  "summary_activation": null,
  "summary_first_dropout": 0.1,
  "summary_proj_to_labels": true,
  "summary_type": "cls_index",
  "summary_use_proj": true,
  "task_specific_params": {
    "text-generation": {
      "do_sample": true,
      "max_length": 320
    }
  },
  "tokenizer_class": "BertTokenizer",
  "transformers_version": "4.24.0",
  "use_cache

2023-04-01 14:59:10 - 945538542.py - <module> - 4 - INFO
using device:cuda
2023-04-01 14:59:10 - 945538542.py - <module> - 7 - INFO
GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(21128, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0): GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
      (1): GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c

In [5]:
train_dataset = Chinese_Medical_DS("..\\..\\Data\\tiny_train.txt", tokenizer)
train_dataloader = DataLoader(dataset=train_dataset, worker_init_fn=worker_init)
valid_dataset = Chinese_Medical_DS("..\\..\\Data\\tiny_valid.txt", tokenizer)
valid_dataloader = DataLoader(dataset=valid_dataset, worker_init_fn=worker_init)
logger.info("len(train_dataloader), len(valid_dataloader) = {}, {}".format(len(train_dataloader), len(valid_dataloader)))

Average private tokens per sentence: 1.22
Private token ratio: 0.0055
Private token count mean: 1.22
Private token count variance: 4.91


2023-04-01 14:59:39 - 3687209076.py - <module> - 5 - INFO
len(train_dataloader), len(valid_dataloader) = 9000, 1500


Average private tokens per sentence: 1.22
Private token ratio: 0.0055
Private token count mean: 1.22
Private token count variance: 5.27


In [6]:
optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=warmup_steps,
                                                          num_training_steps=len(train_dataloader))
tb_path = output_dir + "/tb"
if not os.path.exists(tb_path):
    os.mkdir(tb_path)
writer = SummaryWriter(tb_path)

In [7]:
train_num_log = len(train_dataloader) * np.log(len(train_dataloader))
delta = 1.0 / train_num_log if 1.0 / train_num_log < 1e-5 else 1e-5
epsilon = 0.5
sigma = 0.810546875
epsilon, delta, sigma

(0.5, 1e-05, 0.810546875)

In [8]:
running_loss = 0
early_stopping = EarlyStopping(output_dir)
train_step_per_epoch = len(train_dataloader)
valid_step_per_epoch = len(valid_dataloader)
for epoch in range(epochs):
    logger.info('epoch {}'.format(epoch + 1))
    now = datetime.now()
    logger.info('time: {}'.format(now))
    model.train()
    train_pbar = tqdm(train_dataloader)
    all_train_loss = 0.0
    train_pbar.set_description('epoch-' + str(epoch + 1))
    
    for step, (input, label, private_positions) in enumerate(train_pbar):
        input_ids = torch.tensor(label).long().to(device)
        label_ids = torch.tensor(input).long().to(device)
        private_positions = torch.tensor(private_positions, dtype=torch.long).to(device)

        #  forward pass
        outputs = model(input_ids=input_ids, labels=label_ids, private_positions=private_positions, sigma=sigma)
        loss, logits = outputs[:2]
        
        if gradient_accumulation > 1:
            loss = loss / gradient_accumulation
            
        #  loss backward
        # if fp16:
        #     with amp.scale_loss(loss, optimizer) as scaled_loss:
        #         scaled_loss.backward()
        #         torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), max_grad_norm)
        # else:
        #     loss.backward()
        #     torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
        
        loss.backward()
        loss = loss.detach()
        all_train_loss += loss
        
        writer.add_scalar('loss/train_step_loss', scalar_value=loss * gradient_accumulation, global_step=epoch * train_step_per_epoch+step)
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)

        #  optimizer step
        if (step + 1) % gradient_accumulation == 0:
            running_loss += loss.item()
            optimizer.step()
            optimizer.zero_grad()
            scheduler.step()
        if (step + 1) % log_step == 0:
            logger.info('now time: {}:{}. Step {} of epoch {}, loss {}'.format(
                datetime.now().hour,
                datetime.now().minute,
                (step + 1) // gradient_accumulation,
                epoch + 1,
                running_loss / log_step))
            running_loss = 0
        
        train_pbar.set_postfix({'loss': '{:.7f}'.format(loss*gradient_accumulation)})
        
    logger.info('train step = {}'.format(step))
    all_train_loss = all_train_loss / (step + 1)

    writer.add_scalar('loss/train_epoch_loss', scalar_value=all_train_loss * gradient_accumulation, global_step=epoch + 1)
    logger.info('saving model for epoch {}'.format(epoch + 1))
    if not os.path.exists(output_dir + 'model_epoch{}'.format(epoch + 1)):
        os.mkdir(output_dir + 'model_epoch{}'.format(epoch + 1))
    model_to_save = model.module if hasattr(model, 'module') else model
    model_to_save.save_pretrained(output_dir + 'model_epoch{}'.format(epoch + 1))

    logger.info('epoch {} finished, train loss = {:.10f}'.format(epoch + 1, all_train_loss * gradient_accumulation))

    then = datetime.now()
    logger.info('time: {}'.format(then))
    logger.info('time for one epoch: {}'.format(then - now))
    
    logger.info('start validate')
    model.eval()
    all_valid_loss = 0.0
    valid_pbar = tqdm(valid_dataloader)
    valid_pbar.set_description('valid ' + str(epoch + 1))
    for step, (input, label, private_positions) in enumerate(valid_pbar):
        input_ids = torch.tensor(label).long().to(device)
        label_ids = torch.tensor(input).long().to(device)
        private_positions = torch.tensor(private_positions, dtype=torch.long).to(device)

        #  forward pass
        outputs = model(input_ids=input_ids, labels=label_ids, private_positions=private_positions, sigma=sigma)
        loss = outputs[0].detach()
        writer.add_scalar('loss/valid_step_loss', scalar_value=loss, global_step=epoch * valid_step_per_epoch + step)
        all_valid_loss += loss
        valid_pbar.set_postfix({'loss': '{:.7f}'.format(loss)})
    
    logger.info('valid step = {}'.format(step))
    all_valid_loss = all_valid_loss / (step + 1)
    writer.add_scalar('loss/valid_epoch_loss', scalar_value=all_valid_loss, global_step=epoch+1)
    logger.info('valid finished, valid loss = {:.10f}'.format(all_valid_loss))
    early_stopping(all_valid_loss, model)
    if early_stopping.early_stop:
        logger.info("Early stopping")
        break

writer.close()    

logger.info('training finished')
if not os.path.exists(output_dir + 'final_model'):
    os.mkdir(output_dir + 'final_model')
model_to_save = model.module if hasattr(model, 'module') else model
model_to_save.save_pretrained(output_dir + 'final_model')

2023-04-01 14:59:39 - 1501083698.py - <module> - 6 - INFO
epoch 1
2023-04-01 14:59:39 - 1501083698.py - <module> - 8 - INFO
time: 2023-04-01 14:59:39.306014
epoch-1: 100%|██████████| 9000/9000 [08:35<00:00, 17.44it/s, loss=2.8596079]
2023-04-01 15:08:15 - 1501083698.py - <module> - 60 - INFO
train step = 8999
2023-04-01 15:08:15 - 1501083698.py - <module> - 64 - INFO
saving model for epoch 1
2023-04-01 15:08:15 - 1501083698.py - <module> - 70 - INFO
epoch 1 finished, train loss = 2.6342368126
2023-04-01 15:08:15 - 1501083698.py - <module> - 73 - INFO
time: 2023-04-01 15:08:15.651124
2023-04-01 15:08:15 - 1501083698.py - <module> - 74 - INFO
time for one epoch: 0:08:36.345110
2023-04-01 15:08:15 - 1501083698.py - <module> - 76 - INFO
start validate
valid 1: 100%|██████████| 1500/1500 [00:30<00:00, 48.64it/s, loss=2.7352777]
2023-04-01 15:08:46 - 1501083698.py - <module> - 93 - INFO
valid step = 1499
2023-04-01 15:08:46 - 1501083698.py - <module> - 96 - INFO
valid finished, valid loss = 

Validation loss decreased (inf --> 2.403984).  Saving model ...


2023-04-01 15:08:46 - 1501083698.py - <module> - 6 - INFO
epoch 2
2023-04-01 15:08:46 - 1501083698.py - <module> - 8 - INFO
time: 2023-04-01 15:08:46.871122
epoch-2: 100%|██████████| 9000/9000 [08:12<00:00, 18.27it/s, loss=2.6344121]
2023-04-01 15:16:59 - 1501083698.py - <module> - 60 - INFO
train step = 8999
2023-04-01 15:16:59 - 1501083698.py - <module> - 64 - INFO
saving model for epoch 2
2023-04-01 15:16:59 - 1501083698.py - <module> - 70 - INFO
epoch 2 finished, train loss = 2.3726568222
2023-04-01 15:16:59 - 1501083698.py - <module> - 73 - INFO
time: 2023-04-01 15:16:59.762772
2023-04-01 15:16:59 - 1501083698.py - <module> - 74 - INFO
time for one epoch: 0:08:12.891650
2023-04-01 15:16:59 - 1501083698.py - <module> - 76 - INFO
start validate
valid 2: 100%|██████████| 1500/1500 [00:32<00:00, 45.91it/s, loss=2.7022147]
2023-04-01 15:17:32 - 1501083698.py - <module> - 93 - INFO
valid step = 1499
2023-04-01 15:17:32 - 1501083698.py - <module> - 96 - INFO
valid finished, valid loss = 

Validation loss decreased (2.403984 --> 2.243861).  Saving model ...


2023-04-01 15:17:32 - 1501083698.py - <module> - 6 - INFO
epoch 3
2023-04-01 15:17:32 - 1501083698.py - <module> - 8 - INFO
time: 2023-04-01 15:17:32.864411
epoch-3: 100%|██████████| 9000/9000 [08:07<00:00, 18.44it/s, loss=2.5887065]
2023-04-01 15:25:40 - 1501083698.py - <module> - 60 - INFO
train step = 8999
2023-04-01 15:25:40 - 1501083698.py - <module> - 64 - INFO
saving model for epoch 3
2023-04-01 15:25:41 - 1501083698.py - <module> - 70 - INFO
epoch 3 finished, train loss = 2.2246961594
2023-04-01 15:25:41 - 1501083698.py - <module> - 73 - INFO
time: 2023-04-01 15:25:41.220413
2023-04-01 15:25:41 - 1501083698.py - <module> - 74 - INFO
time for one epoch: 0:08:08.356002
2023-04-01 15:25:41 - 1501083698.py - <module> - 76 - INFO
start validate
valid 3: 100%|██████████| 1500/1500 [00:30<00:00, 48.75it/s, loss=2.6823409]
2023-04-01 15:26:11 - 1501083698.py - <module> - 93 - INFO
valid step = 1499
2023-04-01 15:26:11 - 1501083698.py - <module> - 96 - INFO
valid finished, valid loss = 

Validation loss decreased (2.243861 --> 2.146838).  Saving model ...


2023-04-01 15:26:12 - 1501083698.py - <module> - 6 - INFO
epoch 4
2023-04-01 15:26:12 - 1501083698.py - <module> - 8 - INFO
time: 2023-04-01 15:26:12.415411
epoch-4: 100%|██████████| 9000/9000 [08:01<00:00, 18.69it/s, loss=2.5263071]
2023-04-01 15:34:13 - 1501083698.py - <module> - 60 - INFO
train step = 8999
2023-04-01 15:34:13 - 1501083698.py - <module> - 64 - INFO
saving model for epoch 4
2023-04-01 15:34:14 - 1501083698.py - <module> - 70 - INFO
epoch 4 finished, train loss = 2.1333994865
2023-04-01 15:34:14 - 1501083698.py - <module> - 73 - INFO
time: 2023-04-01 15:34:14.358410
2023-04-01 15:34:14 - 1501083698.py - <module> - 74 - INFO
time for one epoch: 0:08:01.942999
2023-04-01 15:34:14 - 1501083698.py - <module> - 76 - INFO
start validate
valid 4: 100%|██████████| 1500/1500 [00:30<00:00, 48.62it/s, loss=2.6741590]
2023-04-01 15:34:45 - 1501083698.py - <module> - 93 - INFO
valid step = 1499
2023-04-01 15:34:45 - 1501083698.py - <module> - 96 - INFO
valid finished, valid loss = 

Validation loss decreased (2.146838 --> 2.095515).  Saving model ...


2023-04-01 15:34:45 - 1501083698.py - <module> - 6 - INFO
epoch 5
2023-04-01 15:34:45 - 1501083698.py - <module> - 8 - INFO
time: 2023-04-01 15:34:45.609410
epoch-5: 100%|██████████| 9000/9000 [08:02<00:00, 18.67it/s, loss=2.4577608]
2023-04-01 15:42:47 - 1501083698.py - <module> - 60 - INFO
train step = 8999
2023-04-01 15:42:47 - 1501083698.py - <module> - 64 - INFO
saving model for epoch 5
2023-04-01 15:42:48 - 1501083698.py - <module> - 70 - INFO
epoch 5 finished, train loss = 2.0733227730
2023-04-01 15:42:48 - 1501083698.py - <module> - 73 - INFO
time: 2023-04-01 15:42:48.125409
2023-04-01 15:42:48 - 1501083698.py - <module> - 74 - INFO
time for one epoch: 0:08:02.515999
2023-04-01 15:42:48 - 1501083698.py - <module> - 76 - INFO
start validate
valid 5: 100%|██████████| 1500/1500 [00:32<00:00, 46.34it/s, loss=2.6615982]
2023-04-01 15:43:20 - 1501083698.py - <module> - 93 - INFO
valid step = 1499
2023-04-01 15:43:20 - 1501083698.py - <module> - 96 - INFO
valid finished, valid loss = 

Validation loss decreased (2.095515 --> 2.061061).  Saving model ...


2023-04-01 15:43:20 - 1501083698.py - <module> - 6 - INFO
epoch 6
2023-04-01 15:43:20 - 1501083698.py - <module> - 8 - INFO
time: 2023-04-01 15:43:20.895522
epoch-6: 100%|██████████| 9000/9000 [08:06<00:00, 18.51it/s, loss=2.4105928]
2023-04-01 15:51:27 - 1501083698.py - <module> - 60 - INFO
train step = 8999
2023-04-01 15:51:27 - 1501083698.py - <module> - 64 - INFO
saving model for epoch 6
2023-04-01 15:51:27 - 1501083698.py - <module> - 70 - INFO
epoch 6 finished, train loss = 2.0291473866
2023-04-01 15:51:27 - 1501083698.py - <module> - 73 - INFO
time: 2023-04-01 15:51:27.559426
2023-04-01 15:51:27 - 1501083698.py - <module> - 74 - INFO
time for one epoch: 0:08:06.663904
2023-04-01 15:51:27 - 1501083698.py - <module> - 76 - INFO
start validate
valid 6: 100%|██████████| 1500/1500 [00:30<00:00, 48.56it/s, loss=2.6563699]
2023-04-01 15:51:58 - 1501083698.py - <module> - 93 - INFO
valid step = 1499
2023-04-01 15:51:58 - 1501083698.py - <module> - 96 - INFO
valid finished, valid loss = 

Validation loss decreased (2.061061 --> 2.037657).  Saving model ...


2023-04-01 15:51:58 - 1501083698.py - <module> - 6 - INFO
epoch 7
2023-04-01 15:51:58 - 1501083698.py - <module> - 8 - INFO
time: 2023-04-01 15:51:58.861428
epoch-7: 100%|██████████| 9000/9000 [08:08<00:00, 18.43it/s, loss=2.4290924]
2023-04-01 16:00:07 - 1501083698.py - <module> - 60 - INFO
train step = 8999
2023-04-01 16:00:07 - 1501083698.py - <module> - 64 - INFO
saving model for epoch 7
2023-04-01 16:00:07 - 1501083698.py - <module> - 70 - INFO
epoch 7 finished, train loss = 1.9952791929
2023-04-01 16:00:07 - 1501083698.py - <module> - 73 - INFO
time: 2023-04-01 16:00:07.578934
2023-04-01 16:00:07 - 1501083698.py - <module> - 74 - INFO
time for one epoch: 0:08:08.717506
2023-04-01 16:00:07 - 1501083698.py - <module> - 76 - INFO
start validate
valid 7: 100%|██████████| 1500/1500 [00:30<00:00, 48.42it/s, loss=2.6437562]
2023-04-01 16:00:38 - 1501083698.py - <module> - 93 - INFO
valid step = 1499
2023-04-01 16:00:38 - 1501083698.py - <module> - 96 - INFO
valid finished, valid loss = 

Validation loss decreased (2.037657 --> 2.020166).  Saving model ...


2023-04-01 16:00:38 - 1501083698.py - <module> - 6 - INFO
epoch 8
2023-04-01 16:00:38 - 1501083698.py - <module> - 8 - INFO
time: 2023-04-01 16:00:38.963931
epoch-8: 100%|██████████| 9000/9000 [08:05<00:00, 18.54it/s, loss=2.3547471]
2023-04-01 16:08:44 - 1501083698.py - <module> - 60 - INFO
train step = 8999
2023-04-01 16:08:44 - 1501083698.py - <module> - 64 - INFO
saving model for epoch 8
2023-04-01 16:08:44 - 1501083698.py - <module> - 70 - INFO
epoch 8 finished, train loss = 1.9671690464
2023-04-01 16:08:44 - 1501083698.py - <module> - 73 - INFO
time: 2023-04-01 16:08:44.688931
2023-04-01 16:08:44 - 1501083698.py - <module> - 74 - INFO
time for one epoch: 0:08:05.725000
2023-04-01 16:08:44 - 1501083698.py - <module> - 76 - INFO
start validate
valid 8: 100%|██████████| 1500/1500 [00:30<00:00, 48.47it/s, loss=2.6440938]
2023-04-01 16:09:15 - 1501083698.py - <module> - 93 - INFO
valid step = 1499
2023-04-01 16:09:15 - 1501083698.py - <module> - 96 - INFO
valid finished, valid loss = 

Validation loss decreased (2.020166 --> 2.006954).  Saving model ...


2023-04-01 16:09:16 - 1501083698.py - <module> - 6 - INFO
epoch 9
2023-04-01 16:09:16 - 1501083698.py - <module> - 8 - INFO
time: 2023-04-01 16:09:16.038932
epoch-9: 100%|██████████| 9000/9000 [08:13<00:00, 18.25it/s, loss=2.3619170]
2023-04-01 16:17:29 - 1501083698.py - <module> - 60 - INFO
train step = 8999
2023-04-01 16:17:29 - 1501083698.py - <module> - 64 - INFO
saving model for epoch 9
2023-04-01 16:17:29 - 1501083698.py - <module> - 70 - INFO
epoch 9 finished, train loss = 1.9438589811
2023-04-01 16:17:29 - 1501083698.py - <module> - 73 - INFO
time: 2023-04-01 16:17:29.500624
2023-04-01 16:17:29 - 1501083698.py - <module> - 74 - INFO
time for one epoch: 0:08:13.461692
2023-04-01 16:17:29 - 1501083698.py - <module> - 76 - INFO
start validate
valid 9: 100%|██████████| 1500/1500 [00:34<00:00, 43.79it/s, loss=2.6358354]
2023-04-01 16:18:03 - 1501083698.py - <module> - 93 - INFO
valid step = 1499
2023-04-01 16:18:03 - 1501083698.py - <module> - 96 - INFO
valid finished, valid loss = 

Validation loss decreased (2.006954 --> 1.995200).  Saving model ...


2023-04-01 16:18:04 - 1501083698.py - <module> - 6 - INFO
epoch 10
2023-04-01 16:18:04 - 1501083698.py - <module> - 8 - INFO
time: 2023-04-01 16:18:04.163348
epoch-10: 100%|██████████| 9000/9000 [08:12<00:00, 18.28it/s, loss=2.3369248]
2023-04-01 16:26:16 - 1501083698.py - <module> - 60 - INFO
train step = 8999
2023-04-01 16:26:16 - 1501083698.py - <module> - 64 - INFO
saving model for epoch 10
2023-04-01 16:26:17 - 1501083698.py - <module> - 70 - INFO
epoch 10 finished, train loss = 1.9243437052
2023-04-01 16:26:17 - 1501083698.py - <module> - 73 - INFO
time: 2023-04-01 16:26:17.006410
2023-04-01 16:26:17 - 1501083698.py - <module> - 74 - INFO
time for one epoch: 0:08:12.843062
2023-04-01 16:26:17 - 1501083698.py - <module> - 76 - INFO
start validate
valid 10: 100%|██████████| 1500/1500 [00:32<00:00, 45.76it/s, loss=2.6370871]
2023-04-01 16:26:49 - 1501083698.py - <module> - 93 - INFO
valid step = 1499
2023-04-01 16:26:49 - 1501083698.py - <module> - 96 - INFO
valid finished, valid lo

Validation loss decreased (1.995200 --> 1.986294).  Saving model ...


2023-04-01 16:26:50 - 1501083698.py - <module> - 6 - INFO
epoch 11
2023-04-01 16:26:50 - 1501083698.py - <module> - 8 - INFO
time: 2023-04-01 16:26:50.207284
epoch-11: 100%|██████████| 9000/9000 [08:15<00:00, 18.17it/s, loss=2.2538579]
2023-04-01 16:35:05 - 1501083698.py - <module> - 60 - INFO
train step = 8999
2023-04-01 16:35:05 - 1501083698.py - <module> - 64 - INFO
saving model for epoch 11
2023-04-01 16:35:05 - 1501083698.py - <module> - 70 - INFO
epoch 11 finished, train loss = 1.9068467617
2023-04-01 16:35:05 - 1501083698.py - <module> - 73 - INFO
time: 2023-04-01 16:35:05.960288
2023-04-01 16:35:05 - 1501083698.py - <module> - 74 - INFO
time for one epoch: 0:08:15.753004
2023-04-01 16:35:05 - 1501083698.py - <module> - 76 - INFO
start validate
valid 11: 100%|██████████| 1500/1500 [00:32<00:00, 46.74it/s, loss=2.6383166]
2023-04-01 16:35:38 - 1501083698.py - <module> - 93 - INFO
valid step = 1499
2023-04-01 16:35:38 - 1501083698.py - <module> - 96 - INFO
valid finished, valid lo

Validation loss decreased (1.986294 --> 1.979600).  Saving model ...


2023-04-01 16:35:38 - 1501083698.py - <module> - 6 - INFO
epoch 12
2023-04-01 16:35:38 - 1501083698.py - <module> - 8 - INFO
time: 2023-04-01 16:35:38.464289
epoch-12: 100%|██████████| 9000/9000 [08:05<00:00, 18.54it/s, loss=2.2727230]
2023-04-01 16:43:43 - 1501083698.py - <module> - 60 - INFO
train step = 8999
2023-04-01 16:43:43 - 1501083698.py - <module> - 64 - INFO
saving model for epoch 12
2023-04-01 16:43:44 - 1501083698.py - <module> - 70 - INFO
epoch 12 finished, train loss = 1.8934389353
2023-04-01 16:43:44 - 1501083698.py - <module> - 73 - INFO
time: 2023-04-01 16:43:44.302287
2023-04-01 16:43:44 - 1501083698.py - <module> - 74 - INFO
time for one epoch: 0:08:05.837998
2023-04-01 16:43:44 - 1501083698.py - <module> - 76 - INFO
start validate
valid 12: 100%|██████████| 1500/1500 [00:30<00:00, 48.53it/s, loss=2.6364110]
2023-04-01 16:44:15 - 1501083698.py - <module> - 93 - INFO
valid step = 1499
2023-04-01 16:44:15 - 1501083698.py - <module> - 96 - INFO
valid finished, valid lo

Validation loss decreased (1.979600 --> 1.974746).  Saving model ...


2023-04-01 16:44:15 - 1501083698.py - <module> - 6 - INFO
epoch 13
2023-04-01 16:44:15 - 1501083698.py - <module> - 8 - INFO
time: 2023-04-01 16:44:15.609287
epoch-13: 100%|██████████| 9000/9000 [08:08<00:00, 18.42it/s, loss=2.2721488]
2023-04-01 16:52:24 - 1501083698.py - <module> - 60 - INFO
train step = 8999
2023-04-01 16:52:24 - 1501083698.py - <module> - 64 - INFO
saving model for epoch 13
2023-04-01 16:52:24 - 1501083698.py - <module> - 70 - INFO
epoch 13 finished, train loss = 1.8809707165
2023-04-01 16:52:24 - 1501083698.py - <module> - 73 - INFO
time: 2023-04-01 16:52:24.709334
2023-04-01 16:52:24 - 1501083698.py - <module> - 74 - INFO
time for one epoch: 0:08:09.100047
2023-04-01 16:52:24 - 1501083698.py - <module> - 76 - INFO
start validate
valid 13: 100%|██████████| 1500/1500 [00:33<00:00, 45.33it/s, loss=2.6474183]
2023-04-01 16:52:57 - 1501083698.py - <module> - 93 - INFO
valid step = 1499
2023-04-01 16:52:57 - 1501083698.py - <module> - 96 - INFO
valid finished, valid lo

Validation loss decreased (1.974746 --> 1.969567).  Saving model ...


2023-04-01 16:52:58 - 1501083698.py - <module> - 6 - INFO
epoch 14
2023-04-01 16:52:58 - 1501083698.py - <module> - 8 - INFO
time: 2023-04-01 16:52:58.206429
epoch-14: 100%|██████████| 9000/9000 [08:13<00:00, 18.22it/s, loss=2.2760606]
2023-04-01 17:01:12 - 1501083698.py - <module> - 60 - INFO
train step = 8999
2023-04-01 17:01:12 - 1501083698.py - <module> - 64 - INFO
saving model for epoch 14
2023-04-01 17:01:12 - 1501083698.py - <module> - 70 - INFO
epoch 14 finished, train loss = 1.8707462549
2023-04-01 17:01:12 - 1501083698.py - <module> - 73 - INFO
time: 2023-04-01 17:01:12.464016
2023-04-01 17:01:12 - 1501083698.py - <module> - 74 - INFO
time for one epoch: 0:08:14.257587
2023-04-01 17:01:12 - 1501083698.py - <module> - 76 - INFO
start validate
valid 14: 100%|██████████| 1500/1500 [00:32<00:00, 46.74it/s, loss=2.6493819]
2023-04-01 17:01:44 - 1501083698.py - <module> - 93 - INFO
valid step = 1499
2023-04-01 17:01:44 - 1501083698.py - <module> - 96 - INFO
valid finished, valid lo

Validation loss decreased (1.969567 --> 1.966209).  Saving model ...


2023-04-01 17:01:44 - 1501083698.py - <module> - 6 - INFO
epoch 15
2023-04-01 17:01:44 - 1501083698.py - <module> - 8 - INFO
time: 2023-04-01 17:01:44.988018
epoch-15: 100%|██████████| 9000/9000 [08:13<00:00, 18.23it/s, loss=2.3298962]
2023-04-01 17:09:58 - 1501083698.py - <module> - 60 - INFO
train step = 8999
2023-04-01 17:09:58 - 1501083698.py - <module> - 64 - INFO
saving model for epoch 15
2023-04-01 17:09:59 - 1501083698.py - <module> - 70 - INFO
epoch 15 finished, train loss = 1.8631006479
2023-04-01 17:09:59 - 1501083698.py - <module> - 73 - INFO
time: 2023-04-01 17:09:59.009145
2023-04-01 17:09:59 - 1501083698.py - <module> - 74 - INFO
time for one epoch: 0:08:14.021127
2023-04-01 17:09:59 - 1501083698.py - <module> - 76 - INFO
start validate
valid 15: 100%|██████████| 1500/1500 [00:31<00:00, 46.98it/s, loss=2.6523092]
2023-04-01 17:10:30 - 1501083698.py - <module> - 93 - INFO
valid step = 1499
2023-04-01 17:10:30 - 1501083698.py - <module> - 96 - INFO
valid finished, valid lo

Validation loss decreased (1.966209 --> 1.963513).  Saving model ...


2023-04-01 17:10:31 - 1501083698.py - <module> - 6 - INFO
epoch 16
2023-04-01 17:10:31 - 1501083698.py - <module> - 8 - INFO
time: 2023-04-01 17:10:31.371170
epoch-16: 100%|██████████| 9000/9000 [08:13<00:00, 18.25it/s, loss=2.2385042]
2023-04-01 17:18:44 - 1501083698.py - <module> - 60 - INFO
train step = 8999
2023-04-01 17:18:44 - 1501083698.py - <module> - 64 - INFO
saving model for epoch 16
2023-04-01 17:18:45 - 1501083698.py - <module> - 70 - INFO
epoch 16 finished, train loss = 1.8564864397
2023-04-01 17:18:45 - 1501083698.py - <module> - 73 - INFO
time: 2023-04-01 17:18:45.035382
2023-04-01 17:18:45 - 1501083698.py - <module> - 74 - INFO
time for one epoch: 0:08:13.664212
2023-04-01 17:18:45 - 1501083698.py - <module> - 76 - INFO
start validate
valid 16: 100%|██████████| 1500/1500 [00:31<00:00, 47.39it/s, loss=2.6567245]
2023-04-01 17:19:16 - 1501083698.py - <module> - 93 - INFO
valid step = 1499
2023-04-01 17:19:16 - 1501083698.py - <module> - 96 - INFO
valid finished, valid lo

Validation loss decreased (1.963513 --> 1.961372).  Saving model ...


2023-04-01 17:19:17 - 1501083698.py - <module> - 6 - INFO
epoch 17
2023-04-01 17:19:17 - 1501083698.py - <module> - 8 - INFO
time: 2023-04-01 17:19:17.105384
epoch-17: 100%|██████████| 9000/9000 [08:06<00:00, 18.50it/s, loss=2.2690866]
2023-04-01 17:27:23 - 1501083698.py - <module> - 60 - INFO
train step = 8999
2023-04-01 17:27:23 - 1501083698.py - <module> - 64 - INFO
saving model for epoch 17
2023-04-01 17:27:23 - 1501083698.py - <module> - 70 - INFO
epoch 17 finished, train loss = 1.8526811600
2023-04-01 17:27:23 - 1501083698.py - <module> - 73 - INFO
time: 2023-04-01 17:27:23.997777
2023-04-01 17:27:23 - 1501083698.py - <module> - 74 - INFO
time for one epoch: 0:08:06.892393
2023-04-01 17:27:23 - 1501083698.py - <module> - 76 - INFO
start validate
valid 17: 100%|██████████| 1500/1500 [00:31<00:00, 48.37it/s, loss=2.6551199]
2023-04-01 17:27:55 - 1501083698.py - <module> - 93 - INFO
valid step = 1499
2023-04-01 17:27:55 - 1501083698.py - <module> - 96 - INFO
valid finished, valid lo

Validation loss decreased (1.961372 --> 1.959564).  Saving model ...


2023-04-01 17:27:55 - 1501083698.py - <module> - 6 - INFO
epoch 18
2023-04-01 17:27:55 - 1501083698.py - <module> - 8 - INFO
time: 2023-04-01 17:27:55.420777
epoch-18: 100%|██████████| 9000/9000 [08:00<00:00, 18.72it/s, loss=2.2771008]
2023-04-01 17:35:56 - 1501083698.py - <module> - 60 - INFO
train step = 8999
2023-04-01 17:35:56 - 1501083698.py - <module> - 64 - INFO
saving model for epoch 18
2023-04-01 17:35:56 - 1501083698.py - <module> - 70 - INFO
epoch 18 finished, train loss = 1.8495301008
2023-04-01 17:35:56 - 1501083698.py - <module> - 73 - INFO
time: 2023-04-01 17:35:56.518779
2023-04-01 17:35:56 - 1501083698.py - <module> - 74 - INFO
time for one epoch: 0:08:01.098002
2023-04-01 17:35:56 - 1501083698.py - <module> - 76 - INFO
start validate
valid 18: 100%|██████████| 1500/1500 [00:30<00:00, 48.68it/s, loss=2.6621745]
2023-04-01 17:36:27 - 1501083698.py - <module> - 93 - INFO
valid step = 1499
2023-04-01 17:36:27 - 1501083698.py - <module> - 96 - INFO
valid finished, valid lo

Validation loss decreased (1.959564 --> 1.958286).  Saving model ...


2023-04-01 17:36:27 - 1501083698.py - <module> - 6 - INFO
epoch 19
2023-04-01 17:36:27 - 1501083698.py - <module> - 8 - INFO
time: 2023-04-01 17:36:27.729778
epoch-19: 100%|██████████| 9000/9000 [08:00<00:00, 18.71it/s, loss=2.2766562]
2023-04-01 17:44:28 - 1501083698.py - <module> - 60 - INFO
train step = 8999
2023-04-01 17:44:28 - 1501083698.py - <module> - 64 - INFO
saving model for epoch 19
2023-04-01 17:44:29 - 1501083698.py - <module> - 70 - INFO
epoch 19 finished, train loss = 1.8476686478
2023-04-01 17:44:29 - 1501083698.py - <module> - 73 - INFO
time: 2023-04-01 17:44:29.092851
2023-04-01 17:44:29 - 1501083698.py - <module> - 74 - INFO
time for one epoch: 0:08:01.363073
2023-04-01 17:44:29 - 1501083698.py - <module> - 76 - INFO
start validate
valid 19: 100%|██████████| 1500/1500 [00:31<00:00, 48.00it/s, loss=2.6621597]
2023-04-01 17:45:00 - 1501083698.py - <module> - 93 - INFO
valid step = 1499
2023-04-01 17:45:00 - 1501083698.py - <module> - 96 - INFO
valid finished, valid lo

EarlyStopping counter: 1 out of 2


epoch-20: 100%|██████████| 9000/9000 [08:02<00:00, 18.64it/s, loss=2.2836251]
2023-04-01 17:53:03 - 1501083698.py - <module> - 60 - INFO
train step = 8999
2023-04-01 17:53:03 - 1501083698.py - <module> - 64 - INFO
saving model for epoch 20
2023-04-01 17:53:03 - 1501083698.py - <module> - 70 - INFO
epoch 20 finished, train loss = 1.8477418423
2023-04-01 17:53:03 - 1501083698.py - <module> - 73 - INFO
time: 2023-04-01 17:53:03.677853
2023-04-01 17:53:03 - 1501083698.py - <module> - 74 - INFO
time for one epoch: 0:08:03.327002
2023-04-01 17:53:03 - 1501083698.py - <module> - 76 - INFO
start validate
valid 20: 100%|██████████| 1500/1500 [00:30<00:00, 48.42it/s, loss=2.6576827]
2023-04-01 17:53:34 - 1501083698.py - <module> - 93 - INFO
valid step = 1499
2023-04-01 17:53:34 - 1501083698.py - <module> - 96 - INFO
valid finished, valid loss = 1.9584904909
2023-04-01 17:53:34 - 1501083698.py - <module> - 99 - INFO
Early stopping
2023-04-01 17:53:34 - 1501083698.py - <module> - 104 - INFO
traini

EarlyStopping counter: 2 out of 2
