In [1]:
from transformers import GPT2Config, GPT2LMHeadModel
import torch
from torch.utils.data.dataset import Dataset
from torch.utils.data.dataloader import DataLoader
from transformers import BertTokenizer, get_linear_schedule_with_warmup
from tqdm import tqdm
from datetime import datetime
import os
import numpy as np
import logging
import random
from torch.utils.tensorboard import SummaryWriter

In [2]:
def seed_everything(seed: int = 42):
    """Util to make training reproducible"""
    random.seed(seed)

    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)

    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

    if os.getenv("CUBLAS_WORKSPACE_CONFIG") is not None:
        torch.use_deterministic_algorithms(True)
        os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

def worker_init(worked_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

def set_logger(path):

    logger = logging.getLogger()
    handler = logging.FileHandler(path + "/train_log.txt")
    logger.setLevel(level=logging.INFO)
    handler.setLevel(logging.INFO)
    formatter = logging.Formatter(
        "%(asctime)s - %(filename)s - %(funcName)s - %(lineno)s - %(levelname)s\n%(message)s",
        "%Y-%m-%d %H:%M:%S",
    )
    handler.setFormatter(formatter)
    console = logging.StreamHandler()
    console.setFormatter(formatter)
    logger.addHandler(handler)
    logger.addHandler(console)


class Chinese_Medical_DS(Dataset):
    def __init__(self, path, tokenizer, max_len=1024):
        self.path = path
        sentence = []
        with open(self.path, 'r', encoding='utf-8') as f:
            for line in f:
                sen_ids = tokenizer.convert_tokens_to_ids(tokenizer.tokenize(line.strip()))
                full_sen = []
                full_sen.append(tokenizer.convert_tokens_to_ids('[MASK]'))
                full_sen.extend(sen_ids)
                full_sen.append(tokenizer.convert_tokens_to_ids('[CLS]'))
                if len(full_sen) <= max_len:
                    sentence.append(full_sen)
        self.data = sentence
        
    # need to overload
    def __len__(self):
        return len(self.data)

    # need to overload
    def __getitem__(self, idx):
        input = self.data[idx]
        target = input
        return input, target
    
class EarlyStopping:
    """Early stops the training if validation loss doesn't improve after a given patience."""

    def __init__(self, save_path, patience=2, verbose=True, delta=0):
        """
        Args:
            save_path : 模型保存文件夹
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement.
                            Default: False
            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                            Default: 0
        """
        self.save_path = save_path
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta

    def __call__(self, val_loss, model):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            print(f"EarlyStopping counter: {self.counter} out of {self.patience}")
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        """Saves model when validation loss decrease."""
        if self.verbose:
            print(
                f"Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ..."
            )
        model_to_save = model.module if hasattr(model, 'module') else model
        model_to_save.save_pretrained(self.save_path + 'best_model')
        
        # path = os.path.join(self.save_path, "best_network.pth")
        # torch.save(model.state_dict(), path)  # 这里会存储迄今最优模型的参数
        self.val_loss_min = val_loss

In [3]:
seed_everything()
tok_path = '..\\Raw_GPT2\\vocab.txt'
pretrain_model_path = "..\\Raw_GPT2\\"
output_dir = "model\\"

epochs = 50
warmup_steps = 1000
lr = 1e-5
gradient_accumulation = 18
max_grad_norm = 1.0
log_step = 10000
set_logger(output_dir)
logger = logging.getLogger(__name__)


In [4]:
tokenizer = BertTokenizer(vocab_file=tok_path)
model = GPT2LMHeadModel.from_pretrained(pretrain_model_path)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
logger.info('using device:{}'.format(device))
model.train()
model.to(device)
logger.info(model)

2023-03-27 17:06:33 - 945538542.py - <module> - 4 - INFO
using device:cuda
2023-03-27 17:06:33 - 945538542.py - <module> - 7 - INFO
GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(21128, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0): GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
      (1): GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c

In [5]:
train_dataset = Chinese_Medical_DS("..\\Data\\train.txt", tokenizer)
train_dataloader = DataLoader(dataset=train_dataset, worker_init_fn=worker_init)
valid_dataset = Chinese_Medical_DS("..\\Data\\valid.txt", tokenizer)
valid_dataloader = DataLoader(dataset=valid_dataset, worker_init_fn=worker_init)
logger.info("len(train_dataloader), len(valid_dataloader) = {}, {}".format(len(train_dataloader), len(valid_dataloader)))

KeyboardInterrupt: 

In [None]:
optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=warmup_steps,
                                                          num_training_steps=len(train_dataloader))
tb_path = output_dir + "/tb"
if not os.path.exists(tb_path):
    os.mkdir(tb_path)
writer = SummaryWriter(tb_path)

In [None]:
running_loss = 0
early_stopping = EarlyStopping(output_dir)
train_step_per_epoch = len(train_dataloader)
valid_step_per_epoch = len(valid_dataloader)
for epoch in range(epochs):
    logger.info('epoch {}'.format(epoch + 1))
    now = datetime.now()
    logger.info('time: {}'.format(now))
    model.train()
    train_pbar = tqdm(train_dataloader)
    all_train_loss = 0.0
    train_pbar.set_description('epoch-' + str(epoch + 1))
    for step, (input, label) in enumerate(train_pbar):
        input_ids = torch.tensor(label).long().to(device)
        label_ids = torch.tensor(input).long().to(device)

        #  forward pass
        outputs = model(input_ids=input_ids, labels=label_ids)
        loss, logits = outputs[:2]
        
        if gradient_accumulation > 1:
            loss = loss / gradient_accumulation
            
        #  loss backward
        # if fp16:
        #     with amp.scale_loss(loss, optimizer) as scaled_loss:
        #         scaled_loss.backward()
        #         torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), max_grad_norm)
        # else:
        #     loss.backward()
        #     torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
        
        loss.backward()
        loss = loss.detach()
        all_train_loss += loss
        
        writer.add_scalar('loss/train_step_loss', scalar_value=loss * gradient_accumulation, global_step=epoch * train_step_per_epoch+step)
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)

        #  optimizer step
        if (step + 1) % gradient_accumulation == 0:
            running_loss += loss.item()
            optimizer.step()
            optimizer.zero_grad()
            scheduler.step()
        if (step + 1) % log_step == 0:
            logger.info('now time: {}:{}. Step {} of epoch {}, loss {}'.format(
                datetime.now().hour,
                datetime.now().minute,
                (step + 1) // gradient_accumulation,
                epoch + 1,
                running_loss / log_step))
            running_loss = 0
        
        train_pbar.set_postfix({'loss': '{:.7f}'.format(loss*gradient_accumulation)})
        
    logger.info('train step = {}'.format(step))
    all_train_loss = all_train_loss / (step + 1)

    writer.add_scalar('loss/train_epoch_loss', scalar_value=all_train_loss * gradient_accumulation, global_step=epoch + 1)
    logger.info('saving model for epoch {}'.format(epoch + 1))
    if not os.path.exists(output_dir + 'model_epoch{}'.format(epoch + 1)):
        os.mkdir(output_dir + 'model_epoch{}'.format(epoch + 1))
    model_to_save = model.module if hasattr(model, 'module') else model
    model_to_save.save_pretrained(output_dir + 'model_epoch{}'.format(epoch + 1))

    logger.info('epoch {} finished, train loss = {:.10f}'.format(epoch + 1, all_train_loss * gradient_accumulation))

    then = datetime.now()
    logger.info('time: {}'.format(then))
    logger.info('time for one epoch: {}'.format(then - now))
    
    logger.info('start validate')
    model.eval()
    all_valid_loss = 0.0
    valid_pbar = tqdm(valid_dataloader)
    valid_pbar.set_description('valid ' + str(epoch + 1))
    for step, (input, label) in enumerate(valid_pbar):
        input_ids = torch.tensor(label).long().to(device)
        label_ids = torch.tensor(input).long().to(device)

        #  forward pass
        outputs = model(input_ids=input_ids, labels=label_ids)
        loss = outputs[0].detach()
        writer.add_scalar('loss/valid_step_loss', scalar_value=loss, global_step=epoch * valid_step_per_epoch + step)
        all_valid_loss += loss
        valid_pbar.set_postfix({'loss': '{:.7f}'.format(loss)})
    
    logger.info('valid step = {}'.format(step))
    all_valid_loss = all_valid_loss / (step + 1)
    writer.add_scalar('loss/valid_epoch_loss', scalar_value=all_valid_loss, global_step=epoch+1)
    logger.info('valid finished, valid loss = {:.10f}'.format(all_valid_loss))
    early_stopping(all_valid_loss, model)
    if early_stopping.early_stop:
        logger.info("Early stopping")
        break

writer.close()    

logger.info('training finished')
if not os.path.exists(output_dir + 'final_model'):
    os.mkdir(output_dir + 'final_model')
model_to_save = model.module if hasattr(model, 'module') else model
model_to_save.save_pretrained(output_dir + 'final_model')

2023-03-26 09:27:33 - 833870943.py - <module> - 6 - INFO
epoch 1
2023-03-26 09:27:33 - 833870943.py - <module> - 8 - INFO
time: 2023-03-26 09:27:33.107631
epoch-1:  11%|█         | 9998/90000 [09:23<1:15:33, 17.65it/s, loss=1.2352529]2023-03-26 09:36:56 - 833870943.py - <module> - 48 - INFO
now time: 9:36. Step 555 of epoch 1, loss 0.007813431648910046
epoch-1:  22%|██▏       | 19998/90000 [18:38<1:09:17, 16.84it/s, loss=1.8927422]2023-03-26 09:46:11 - 833870943.py - <module> - 48 - INFO
now time: 9:46. Step 1111 of epoch 1, loss 0.006947780641168356
epoch-1:  33%|███▎      | 29999/90000 [27:28<53:40, 18.63it/s, loss=1.6036086]  2023-03-26 09:55:02 - 833870943.py - <module> - 48 - INFO
now time: 9:55. Step 1666 of epoch 1, loss 0.006314156853407621
epoch-1:  44%|████▍     | 39999/90000 [36:24<44:11, 18.86it/s, loss=2.2541289]  2023-03-26 10:03:57 - 833870943.py - <module> - 48 - INFO
now time: 10:3. Step 2222 of epoch 1, loss 0.006171812727302313
epoch-1:  56%|█████▌    | 49999/90000 [

Validation loss decreased (inf --> 1.987409).  Saving model ...


2023-03-26 10:54:16 - 833870943.py - <module> - 6 - INFO
epoch 2
2023-03-26 10:54:16 - 833870943.py - <module> - 8 - INFO
time: 2023-03-26 10:54:16.460477
epoch-2:  11%|█         | 9999/90000 [09:14<1:13:13, 18.21it/s, loss=0.3034219]2023-03-26 11:03:30 - 833870943.py - <module> - 48 - INFO
now time: 11:3. Step 555 of epoch 2, loss 0.006110611184686423
epoch-2:  22%|██▏       | 19998/90000 [18:22<1:07:57, 17.17it/s, loss=1.5156682]2023-03-26 11:12:39 - 833870943.py - <module> - 48 - INFO
now time: 11:12. Step 1111 of epoch 2, loss 0.005857782790530473
epoch-2:  33%|███▎      | 29999/90000 [27:27<53:15, 18.78it/s, loss=1.3645127]  2023-03-26 11:21:44 - 833870943.py - <module> - 48 - INFO
now time: 11:21. Step 1666 of epoch 2, loss 0.005708108245022595
epoch-2:  44%|████▍     | 39998/90000 [36:51<47:45, 17.45it/s, loss=2.0279479]  2023-03-26 11:31:07 - 833870943.py - <module> - 48 - INFO
now time: 11:31. Step 2222 of epoch 2, loss 0.005530866366997361
epoch-2:  56%|█████▌    | 49998/9000

Validation loss decreased (1.987409 --> 1.875364).  Saving model ...


2023-03-26 12:21:17 - 833870943.py - <module> - 6 - INFO
epoch 3
2023-03-26 12:21:17 - 833870943.py - <module> - 8 - INFO
time: 2023-03-26 12:21:17.165731
epoch-3:  11%|█         | 9999/90000 [08:59<1:13:49, 18.06it/s, loss=0.2567370]2023-03-26 12:30:16 - 833870943.py - <module> - 48 - INFO
now time: 12:30. Step 555 of epoch 3, loss 0.005662889221962541
epoch-3:  22%|██▏       | 19998/90000 [17:55<1:06:23, 17.57it/s, loss=1.4556347]2023-03-26 12:39:12 - 833870943.py - <module> - 48 - INFO
now time: 12:39. Step 1111 of epoch 3, loss 0.0055732601010240615
epoch-3:  33%|███▎      | 29997/90000 [26:47<52:36, 19.01it/s, loss=1.1073730]  2023-03-26 12:48:04 - 833870943.py - <module> - 48 - INFO
now time: 12:48. Step 1666 of epoch 3, loss 0.005502921638078987
epoch-3:  44%|████▍     | 39997/90000 [35:43<47:17, 17.62it/s, loss=1.9999142]  2023-03-26 12:57:00 - 833870943.py - <module> - 48 - INFO
now time: 12:57. Step 2222 of epoch 3, loss 0.005262369329482317
epoch-3:  56%|█████▌    | 49998/90

Validation loss decreased (1.875364 --> 1.814677).  Saving model ...


2023-03-26 13:47:35 - 833870943.py - <module> - 6 - INFO
epoch 4
2023-03-26 13:47:35 - 833870943.py - <module> - 8 - INFO
time: 2023-03-26 13:47:35.491001
epoch-4:  11%|█         | 9999/90000 [08:52<1:10:51, 18.82it/s, loss=0.1929289]2023-03-26 13:56:28 - 833870943.py - <module> - 48 - INFO
now time: 13:56. Step 555 of epoch 4, loss 0.005457634353544563
epoch-4:  22%|██▏       | 19998/90000 [17:38<1:05:16, 17.88it/s, loss=1.4726558]2023-03-26 14:05:13 - 833870943.py - <module> - 48 - INFO
now time: 14:5. Step 1111 of epoch 4, loss 0.005402944914437831
epoch-4:  33%|███▎      | 29999/90000 [26:18<51:50, 19.29it/s, loss=0.9365353]  2023-03-26 14:13:54 - 833870943.py - <module> - 48 - INFO
now time: 14:13. Step 1666 of epoch 4, loss 0.0053678151657804846
epoch-4:  44%|████▍     | 39999/90000 [35:03<43:06, 19.33it/s, loss=1.9493117]  2023-03-26 14:22:39 - 833870943.py - <module> - 48 - INFO
now time: 14:22. Step 2222 of epoch 4, loss 0.005081958138756454
epoch-4:  56%|█████▌    | 49998/900

Validation loss decreased (1.814677 --> 1.776170).  Saving model ...


2023-03-26 15:11:36 - 833870943.py - <module> - 6 - INFO
epoch 5
2023-03-26 15:11:36 - 833870943.py - <module> - 8 - INFO
time: 2023-03-26 15:11:36.828352
epoch-5:  11%|█         | 9999/90000 [08:58<1:10:00, 19.05it/s, loss=0.2127250]2023-03-26 15:20:35 - 833870943.py - <module> - 48 - INFO
now time: 15:20. Step 555 of epoch 5, loss 0.005325427847821265
epoch-5:  22%|██▏       | 19998/90000 [17:54<1:04:23, 18.12it/s, loss=1.4037279]2023-03-26 15:29:31 - 833870943.py - <module> - 48 - INFO
now time: 15:29. Step 1111 of epoch 5, loss 0.005286604707688093
epoch-5:  33%|███▎      | 29998/90000 [26:45<52:39, 18.99it/s, loss=0.8346838]  2023-03-26 15:38:22 - 833870943.py - <module> - 48 - INFO
now time: 15:38. Step 1666 of epoch 5, loss 0.0052620055502280595
epoch-5:  44%|████▍     | 39999/90000 [35:41<46:29, 17.92it/s, loss=1.9218628]  2023-03-26 15:47:18 - 833870943.py - <module> - 48 - INFO
now time: 15:47. Step 2222 of epoch 5, loss 0.004953988527692854
epoch-5:  56%|█████▌    | 49998/90

Validation loss decreased (1.776170 --> 1.748206).  Saving model ...


2023-03-26 16:36:30 - 833870943.py - <module> - 6 - INFO
epoch 6
2023-03-26 16:36:30 - 833870943.py - <module> - 8 - INFO
time: 2023-03-26 16:36:30.360279
epoch-6:  11%|█         | 9999/90000 [08:59<1:08:51, 19.36it/s, loss=0.1722431]2023-03-26 16:45:30 - 833870943.py - <module> - 48 - INFO
now time: 16:45. Step 555 of epoch 6, loss 0.005232632712740451
epoch-6:  22%|██▏       | 19998/90000 [17:58<1:05:58, 17.68it/s, loss=1.4022651]2023-03-26 16:54:28 - 833870943.py - <module> - 48 - INFO
now time: 16:54. Step 1111 of epoch 6, loss 0.0051908160829916596
epoch-6:  33%|███▎      | 29997/90000 [26:50<52:52, 18.91it/s, loss=0.7222533]  2023-03-26 17:03:21 - 833870943.py - <module> - 48 - INFO
now time: 17:3. Step 1666 of epoch 6, loss 0.005182975849322975
epoch-6:  44%|████▍     | 39998/90000 [35:45<47:52, 17.41it/s, loss=1.9361275]  2023-03-26 17:12:15 - 833870943.py - <module> - 48 - INFO
now time: 17:12. Step 2222 of epoch 6, loss 0.004857331681251526
epoch-6:  56%|█████▌    | 49999/900

Validation loss decreased (1.748206 --> 1.727814).  Saving model ...


2023-03-26 18:01:28 - 833870943.py - <module> - 6 - INFO
epoch 7
2023-03-26 18:01:28 - 833870943.py - <module> - 8 - INFO
time: 2023-03-26 18:01:28.975441
epoch-7:  11%|█         | 9999/90000 [09:00<1:11:06, 18.75it/s, loss=0.1780500]2023-03-26 18:10:29 - 833870943.py - <module> - 48 - INFO
now time: 18:10. Step 555 of epoch 7, loss 0.0051687027758918706
epoch-7:  22%|██▏       | 19998/90000 [17:57<1:07:44, 17.22it/s, loss=1.4370263]2023-03-26 18:19:26 - 833870943.py - <module> - 48 - INFO
now time: 18:19. Step 1111 of epoch 7, loss 0.005121685974579304
epoch-7:  33%|███▎      | 29999/90000 [26:49<52:27, 19.06it/s, loss=0.5652030]  2023-03-26 18:28:18 - 833870943.py - <module> - 48 - INFO
now time: 18:28. Step 1666 of epoch 7, loss 0.005115795264020563
epoch-7:  44%|████▍     | 39999/90000 [35:44<43:11, 19.30it/s, loss=1.9016135]  2023-03-26 18:37:13 - 833870943.py - <module> - 48 - INFO
now time: 18:37. Step 2222 of epoch 7, loss 0.004778570753429085
epoch-7:  56%|█████▌    | 49999/90

Validation loss decreased (1.727814 --> 1.711887).  Saving model ...


2023-03-26 19:26:27 - 833870943.py - <module> - 6 - INFO
epoch 8
2023-03-26 19:26:27 - 833870943.py - <module> - 8 - INFO
time: 2023-03-26 19:26:27.370592
epoch-8:  11%|█         | 9999/90000 [08:59<1:10:24, 18.94it/s, loss=0.1960545]2023-03-26 19:35:26 - 833870943.py - <module> - 48 - INFO
now time: 19:35. Step 555 of epoch 8, loss 0.005106425412930548
epoch-8:  22%|██▏       | 19998/90000 [17:54<1:05:26, 17.83it/s, loss=1.3948320]2023-03-26 19:44:21 - 833870943.py - <module> - 48 - INFO
now time: 19:44. Step 1111 of epoch 8, loss 0.0050581348762847485
epoch-8:  33%|███▎      | 29999/90000 [26:45<51:57, 19.24it/s, loss=0.5386490]  2023-03-26 19:53:12 - 833870943.py - <module> - 48 - INFO
now time: 19:53. Step 1666 of epoch 8, loss 0.005054469848051667
epoch-8:  44%|████▍     | 39999/90000 [35:40<44:50, 18.58it/s, loss=1.9366363]  2023-03-26 20:02:07 - 833870943.py - <module> - 48 - INFO
now time: 20:2. Step 2222 of epoch 8, loss 0.004713246468268335
epoch-8:  56%|█████▌    | 49998/900

Validation loss decreased (1.711887 --> 1.698062).  Saving model ...


2023-03-26 20:51:24 - 833870943.py - <module> - 6 - INFO
epoch 9
2023-03-26 20:51:24 - 833870943.py - <module> - 8 - INFO
time: 2023-03-26 20:51:24.906402
epoch-9:  11%|█         | 9998/90000 [08:58<1:10:05, 19.02it/s, loss=0.1822250]2023-03-26 21:00:23 - 833870943.py - <module> - 48 - INFO
now time: 21:0. Step 555 of epoch 9, loss 0.005061278734076768
epoch-9:  22%|██▏       | 19999/90000 [17:54<1:12:26, 16.10it/s, loss=1.3826845]2023-03-26 21:09:19 - 833870943.py - <module> - 48 - INFO
now time: 21:9. Step 1111 of epoch 9, loss 0.005001440432015806
epoch-9:  33%|███▎      | 29997/90000 [26:45<53:18, 18.76it/s, loss=0.5471426]  2023-03-26 21:18:10 - 833870943.py - <module> - 48 - INFO
now time: 21:18. Step 1666 of epoch 9, loss 0.005000369258970022
epoch-9:  44%|████▍     | 39999/90000 [35:40<43:44, 19.05it/s, loss=1.8413776]  2023-03-26 21:27:05 - 833870943.py - <module> - 48 - INFO
now time: 21:27. Step 2222 of epoch 9, loss 0.0046660763918422166
epoch-9:  56%|█████▌    | 49998/9000

Validation loss decreased (1.698062 --> 1.687524).  Saving model ...


2023-03-26 22:17:37 - 833870943.py - <module> - 6 - INFO
epoch 10
2023-03-26 22:17:37 - 833870943.py - <module> - 8 - INFO
time: 2023-03-26 22:17:37.765235
epoch-10:  11%|█         | 9999/90000 [08:49<1:12:05, 18.49it/s, loss=0.1530405]2023-03-26 22:26:26 - 833870943.py - <module> - 48 - INFO
now time: 22:26. Step 555 of epoch 10, loss 0.005020985192619264
epoch-10:  22%|██▏       | 19999/90000 [17:51<1:07:32, 17.27it/s, loss=1.3855948]2023-03-26 22:35:29 - 833870943.py - <module> - 48 - INFO
now time: 22:35. Step 1111 of epoch 10, loss 0.004964002070203423
epoch-10:  33%|███▎      | 29999/90000 [26:39<52:19, 19.11it/s, loss=0.4580060]  2023-03-26 22:44:17 - 833870943.py - <module> - 48 - INFO
now time: 22:44. Step 1666 of epoch 10, loss 0.004969878990389406
epoch-10:  44%|████▍     | 39999/90000 [35:29<44:06, 18.90it/s, loss=1.8191639]  2023-03-26 22:53:07 - 833870943.py - <module> - 48 - INFO
now time: 22:53. Step 2222 of epoch 10, loss 0.004614406159799546
epoch-10:  56%|█████▌    |