In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sklearn

# 1 数据读入

In [2]:
data_path = './proceed/train_data_all.csv'
data=pd.read_csv(data_path,encoding='utf-8',sep='|')
data['content'].str.len().describe()

count    24992.000000
mean       419.802937
std        121.761374
min         11.000000
25%        330.000000
50%        512.000000
75%        512.000000
max        513.000000
Name: content, dtype: float64

## 构造训练集和测试集

In [11]:
df=sklearn.utils.shuffle(data) #随机打乱
train_data = df.sample(frac=0.9, random_state=0, axis=0)
dev_data = df.drop(train_data.index)
train_data_path = './proceed/train_data.csv'
dev_data_path = './proceed/dev_data.csv'
# #将训练数据写入到文件中
train_data.to_csv(train_data_path,  index=False,encoding='utf-8',sep ='|',header =['id','content','abstract'])
# #将测试数据写入文件中
dev_data.to_csv(dev_data_path,  index=False,encoding='utf-8',sep ='|',header =['id','content','abstract'])

In [12]:
print("================train===============")
print(train_data['content'].str.len().describe())
print("=================dev=================")
print(dev_data['content'].str.len().describe())
print("====================================")

count    22492.000000
mean       419.649475
std        121.892133
min         11.000000
25%        330.000000
50%        512.000000
75%        512.000000
max        513.000000
Name: content, dtype: float64
count    2500.000000
mean      421.183600
std       120.593983
min        18.000000
25%       328.000000
50%       512.000000
75%       512.000000
max       513.000000
Name: content, dtype: float64


In [13]:
from paddlenlp.datasets import load_dataset
# # q:上面的函数还能怎么写
# # a:可以用pandas读取csv文件，然后用iterrows()方法遍历每一行，然后yield
def read_from_csv(data_path):
    data=pd.read_csv(data_path,encoding='utf-8',sep='|')
    for index, row in data.iterrows():
        content, abstract = row['content'],row['abstract']
        yield {'content': content, 'abstract': abstract}

# data_path为read()方法的参数
train_dataset  = load_dataset(read_from_csv, data_path=train_data_path, lazy=False, split="train")
dev_dataset  = load_dataset(read_from_csv, data_path=dev_data_path, lazy=False, split="dev")
print(len(train_dataset))
print(len(dev_dataset))



22501
2500


In [37]:
from paddlenlp.transformers import PegasusForConditionalGeneration, PegasusChineseTokenizer
from paddlenlp.transformers import LinearDecayWithWarmup
from paddle.io import BatchSampler, DistributedBatchSampler, DataLoader
from paddlenlp.data import DataCollatorForSeq2Seq

#创建Tokenizer，用于分词，将token映射成id。
1# 初始化分词器
tokenizer = PegasusChineseTokenizer.from_pretrained('IDEA-CCNL/Randeng-Pegasus-238M-Summary-Chinese')
# 初始化模型，'IDEA-CCNL/Randeng-Pegasus-238M-Summary-Chinese/'IDEA-CCNL/Randeng-Pegasus-538M-Summary-Chinese
model = PegasusForConditionalGeneration.from_pretrained('IDEA-CCNL/Randeng-Pegasus-238M-Summary-Chinese')

# tokenizer = PegasusChineseTokenizer.from_pretrained('PaddlePaddle/Randeng-Pegasus-238M-Summary-Chinese-SSTIA')
# # 初始化模型，'IDEA-CCNL/Randeng-Pegasus-238M-Summary-Chinese/'IDEA-CCNL/Randeng-Pegasus-538M-Summary-Chinese
# model = PegasusForConditionalGeneration.from_pretrained('PaddlePaddle/Randeng-Pegasus-238M-Summary-Chinese-SSTIA')
# 组装 Batch 数据 & Padding
batchify_fn = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)

[32m[2023-06-04 23:54:15,523] [    INFO][0m - Already cached /home/ubuntu/.paddlenlp/models/IDEA-CCNL/Randeng-Pegasus-238M-Summary-Chinese/vocab.txt[0m
[32m[2023-06-04 23:54:15,524] [    INFO][0m - Already cached /home/ubuntu/.paddlenlp/models/IDEA-CCNL/Randeng-Pegasus-238M-Summary-Chinese/added_tokens.json[0m
[32m[2023-06-04 23:54:15,525] [    INFO][0m - Already cached /home/ubuntu/.paddlenlp/models/IDEA-CCNL/Randeng-Pegasus-238M-Summary-Chinese/special_tokens_map.json[0m
[32m[2023-06-04 23:54:15,526] [    INFO][0m - Already cached /home/ubuntu/.paddlenlp/models/IDEA-CCNL/Randeng-Pegasus-238M-Summary-Chinese/tokenizer_config.json[0m
[32m[2023-06-04 23:54:15,584] [    INFO][0m - Already cached /home/ubuntu/.paddlenlp/models/IDEA-CCNL/Randeng-Pegasus-238M-Summary-Chinese/model_state.pdparams[0m
[32m[2023-06-04 23:54:15,585] [    INFO][0m - Already cached /home/ubuntu/.paddlenlp/models/IDEA-CCNL/Randeng-Pegasus-238M-Summary-Chinese/model_config.json[0m


# 训练

In [38]:
#安装需要的相关库
import os
import json
import argparse
import random
import time
import paddle
import paddlenlp
import distutils.util
from pprint import pprint
from functools import partial
from tqdm import tqdm
import numpy as np
import math
from datasets import load_dataset
import contextlib
from rouge import Rouge
from visualdl import LogWriter


import pandas as pd 
from paddlenlp.datasets import MapDataset
import paddle.nn as nn
from paddlenlp.utils.log import logger
from paddlenlp.metrics import BLEU
from paddlenlp.data import Tuple, Pad

In [39]:
#定义convert_example，将content和title文本映射成int类型的id，同时构造labels。
def convert_example(example, text_column, summary_column, tokenizer,
                    max_source_length, max_target_length):
    """
    构造模型的输入.
    """
    inputs = example[text_column]
    targets = example[summary_column]
    # 分词
    model_inputs = tokenizer(inputs,
                             max_length=max_source_length,
                             padding=False,
                             truncation=True,
                             return_attention_mask=True)
    labels = tokenizer(targets,
                       max_length=max_target_length,
                       padding=False,
                       truncation=True)
    # 得到labels，后续通过DataCollatorForSeq2Seq进行移位
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

In [40]:
#由于预训练模型限制，这里把文本最大长度设置为512
# 文本的最大长度
max_source_length = 512
# 摘要的最大长度
max_target_length = 160
# 摘要的最小长度
min_target_length = 0

In [41]:
#使用partial函数指定默认参数，使用map函数转换数据。map函数把原来的文本根据词汇表的编号转换成了相应的id，为了便于理解，这里把训练集合的部分样本展示出来。
# 定义转换器
trans_func = partial(convert_example,
                     text_column='content',
                     summary_column='abstract',
                     tokenizer=tokenizer,
                     max_source_length=max_source_length,
                     max_target_length=max_target_length)
                     
# train_dataset和dev_dataset分别转换
train_dataset = train_dataset.map(trans_func,
                                  batched=False,
                                  lazy =False)

dev_dataset = dev_dataset.map(trans_func,
                              batched=False,
                            lazy =False)
# 输出训练集的前 2 条样本
for idx, example in enumerate(train_dataset):
    if idx < 2:
        print(example) 

{'input_ids': [1117, 131, 1266, 230, 21328, 4054, 13055, 1266, 13048, 1266, 221, 1909, 1857, 5226, 1608, 1909, 26198, 1909, 15944, 39771, 615, 6659, 36230, 791, 10961, 1266, 3399, 4813, 221, 12695, 27871, 1101, 6445, 12695, 1909, 2349, 2355, 791, 1266, 3399, 4659, 23162, 1266, 1608, 21933, 21018, 3399, 2274, 615, 197, 2334, 333, 6659, 36230, 791, 10961, 505, 1608, 1909, 26198, 1909, 10838, 2421, 230, 299, 4919, 2587, 2587, 275, 696, 1227, 13984, 30698, 4920, 2274, 18423, 1909, 25537, 38129, 23167, 1117, 131, 1477, 791, 14093, 1477, 1101, 709, 791, 14093, 5034, 7018, 200, 6800, 39223, 15203, 2274, 22617, 2334, 6683, 10753, 5034, 2274, 18817, 221, 44582, 1266, 3399, 1909, 1608, 21933, 3290, 15944, 1418, 131, 4656, 19164, 23548, 676, 8398, 1979, 435, 1454, 15214, 15214, 1909, 4919, 346, 18739, 23476, 696, 275, 791, 1266, 1909, 1608, 12268, 823, 4599, 1608, 21933, 3851, 1078, 3299, 1117, 131, 4503, 1909, 15944, 15203, 2274, 22617, 2334, 520, 696, 520, 2274, 823, 36566, 3399, 1909, 3441, 33

In [42]:
train_data_loader  = paddle.io.DataLoader(
    dataset=train_dataset,
    batch_size=8,
    return_list=True,
    collate_fn=batchify_fn)
dev_batch_sampler  = BatchSampler(
    dataset=dev_dataset,
    batch_size=8,
    shuffle=False)




# 构造测试Dataloader
dev_data_loader = DataLoader(dataset=dev_dataset,
                             batch_sampler=dev_batch_sampler,
                             num_workers=0,
                             collate_fn=batchify_fn,
                             return_list=True)

## 训练配置

In [45]:
# 学习率预热比例
warmup = 0.02
# 学习率
learning_rate = 5e-5
# 训练轮次
num_epochs =50
# 训练总步数
num_training_steps = len(train_data_loader) * num_epochs
# AdamW优化器参数epsilon
adam_epsilon = 1e-6
# AdamW优化器参数weight_decay
weight_decay=0.01
# 训练中，每100个log_steps打印一次日志
log_steps = 100
# 训练中，每隔eval_steps进行一次模型评估
eval_steps = 10000
# 使用SSTIA
model.use_SSTIA = True
model.mix_ratio = 0.3


# 训练模型保存路径
output_dir = 'checkpoints'
# 解码beam size
num_beams = 4

log_writer = LogWriter('visualdl_log_dir')
lr_scheduler = LinearDecayWithWarmup(learning_rate, num_training_steps, warmup)

# LayerNorm参数不参与weight_decay
decay_params = [
    p.name for n, p in model.named_parameters()
    if not any(nd in n for nd in ["bias", "norm"])
]

# 优化器AdamW
optimizer = paddle.optimizer.AdamW(
    learning_rate=lr_scheduler,
    beta1=0.9,
    beta2=0.999,
    epsilon=adam_epsilon,
    parameters=model.parameters(),
    weight_decay=weight_decay,
    apply_decay_param_fun=lambda x: x in decay_params)

In [46]:
print(num_training_steps)

156300


In [47]:
# 计算训练评估参数Rouge-1，Rouge-2，Rouge-L
def compute_metrics(preds, targets):
    assert len(preds) == len(targets), (
        'The length of pred_responses should be equal to the length of '
        'target_responses. But received {} and {}.'.format(
            len(preds), len(targets)))
    rouge = Rouge()
    
    scores = []
    for pred, target in zip(preds, targets):
        try:
            score = rouge.get_scores(' '.join(pred), ' '.join(target))
            scores.append([
                score[0]['rouge-1']['f'], score[0]['rouge-2']['f'],
                score[0]['rouge-l']['f']
            ])
        except ValueError:
            scores.append([0, 0, 0])
    rouge1 = np.mean([i[0] for i in scores])
    rouge2 = np.mean([i[1] for i in scores])
    rougel = np.mean([i[2] for i in scores])
    
    print('\n' + '*' * 15)
    print('The auto evaluation result is:')
    print('rouge-1:', round(rouge1*100, 2))
    print('rouge-2:', round(rouge2*100, 2))
    print('rouge-L:', round(rougel*100, 2))
   
    return rouge1, rouge2, rougel

In [16]:
# 模型评估函数
@paddle.no_grad()
def evaluate(model, data_loader, tokenizer, min_target_length,
             max_target_length):
    model.eval()
    all_preds = []
    all_labels = []
    model = model._layers if isinstance(model, paddle.DataParallel) else model
    for batch in tqdm(data_loader, total=len(data_loader), desc="Eval step"):
        labels = batch.pop('labels').numpy()
        # 模型生成
        preds = model.generate(input_ids=batch['input_ids'],
                               attention_mask=batch['attention_mask'],
                               min_length=min_target_length,
                               max_length=max_target_length,
                               decode_strategy='greedy_search',
                               use_cache=True)[0]
        # tokenizer将id转为string
        all_preds.extend(
            tokenizer.batch_decode(preds.numpy(),
                                   skip_special_tokens=True,
                                   clean_up_tokenization_spaces=False))
        labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
        all_labels.extend(
            tokenizer.batch_decode(labels,
                                   skip_special_tokens=True,
                                   clean_up_tokenization_spaces=False))
    rouge1, rouge2, rougel= compute_metrics(all_preds, all_labels)
    model.train()
    return rouge1, rouge2, rougel

## 定义训练

In [52]:
def train(model, train_data_loader):
    eval_steps = 20000
    global_step = 0
    best_rougel = 0
    tic_train = time.time()
    for epoch in range(num_epochs):
        for step, batch in enumerate(train_data_loader):
            global_step += 1
            # 模型前向训练，计算loss
            lm_logits, _, loss = model(**batch)
            loss.backward()
            optimizer.step()
            lr_scheduler.step()
            optimizer.clear_grad()
            if global_step % log_steps == 0:
                logger.info(
                    "global step %d/%d, epoch: %d, batch: %d, rank_id: %s, loss: %f, lr: %.10f, speed: %.4f step/s"
                    % (global_step, num_training_steps, epoch, step,
                        paddle.distributed.get_rank(), loss, optimizer.get_lr(),
                        log_steps / (time.time() - tic_train)))
                log_writer.add_scalar("train_loss", loss.numpy(), global_step)
                tic_train = time.time()
#             if global_step % eval_steps == 0 or global_step == num_training_steps or global_step in [10,6000,10000]:
#                 tic_eval = time.time()
#                 rouge1, rouge2, rougel = evaluate(model, dev_data_loader, tokenizer,
#                             min_target_length, max_target_length)
#                 logger.info("eval done total : %s s" % (time.time() - tic_eval))
#                 log_writer.add_scalar("eval_rouge1", rouge1, global_step)
#                 log_writer.add_scalar("eval_rouge2", rouge2, global_step)
#                 log_writer.add_scalar("eval_rougel", rougel, global_step)
#                 if best_rougel < rougel:
#                     best_rougel = rougel
#                     if paddle.distributed.get_rank() == 0:
#                         if not os.path.exists(output_dir):
#                             os.makedirs(output_dir)
#                         # Need better way to get inner model of DataParallel
#                         model_to_save = model._layers if isinstance(
#                             model, paddle.DataParallel) else model
#                         model_to_save.save_pretrained(output_dir)
#                         tokenizer.save_pretrained(output_dir)

            if global_step % eval_steps == 0 or global_step == num_training_steps or global_step in [0,1000,140000,150000]:
                if paddle.distributed.get_rank() == 0:
                        if not os.path.exists(output_dir):
                            os.makedirs(output_dir)
                        # Need better way to get inner model of DataParallel
                        model_to_save = model._layers if isinstance(
                            model, paddle.DataParallel) else model
                        model_to_save.save_pretrained(output_dir)
                        tokenizer.save_pretrained(output_dir)

In [53]:
# 调用
# 模型训练
train(model, train_data_loader)

[32m[2023-06-05 22:28:40,360] [    INFO][0m - global step 100/156300, epoch: 0, batch: 99, rank_id: 0, loss: 3.599521, lr: 0.0000018234, speed: 0.9260 step/s[0m
[32m[2023-06-05 22:30:33,192] [    INFO][0m - global step 200/156300, epoch: 0, batch: 199, rank_id: 0, loss: 2.158661, lr: 0.0000034229, speed: 0.8864 step/s[0m
[32m[2023-06-05 22:32:26,325] [    INFO][0m - global step 300/156300, epoch: 0, batch: 299, rank_id: 0, loss: 2.505717, lr: 0.0000050224, speed: 0.8840 step/s[0m
[32m[2023-06-05 22:34:18,029] [    INFO][0m - global step 400/156300, epoch: 0, batch: 399, rank_id: 0, loss: 2.098660, lr: 0.0000066219, speed: 0.8954 step/s[0m
[32m[2023-06-05 22:36:04,628] [    INFO][0m - global step 500/156300, epoch: 0, batch: 499, rank_id: 0, loss: 2.317709, lr: 0.0000082214, speed: 0.9382 step/s[0m
[32m[2023-06-05 22:37:55,498] [    INFO][0m - global step 600/156300, epoch: 0, batch: 599, rank_id: 0, loss: 1.524417, lr: 0.0000098209, speed: 0.9021 step/s[0m
[32m[2023-

[32m[2023-06-05 23:57:17,868] [    INFO][0m - global step 4900/156300, epoch: 1, batch: 1773, rank_id: 0, loss: 1.378705, lr: 0.0000494164, speed: 0.8926 step/s[0m
[32m[2023-06-05 23:59:07,226] [    INFO][0m - global step 5000/156300, epoch: 1, batch: 1873, rank_id: 0, loss: 1.137408, lr: 0.0000493837, speed: 0.9146 step/s[0m
[32m[2023-06-06 00:00:59,741] [    INFO][0m - global step 5100/156300, epoch: 1, batch: 1973, rank_id: 0, loss: 0.984272, lr: 0.0000493511, speed: 0.8889 step/s[0m
[32m[2023-06-06 00:02:48,769] [    INFO][0m - global step 5200/156300, epoch: 1, batch: 2073, rank_id: 0, loss: 1.023719, lr: 0.0000493184, speed: 0.9173 step/s[0m
[32m[2023-06-06 00:04:34,926] [    INFO][0m - global step 5300/156300, epoch: 1, batch: 2173, rank_id: 0, loss: 0.841073, lr: 0.0000492858, speed: 0.9421 step/s[0m
[32m[2023-06-06 00:06:26,500] [    INFO][0m - global step 5400/156300, epoch: 1, batch: 2273, rank_id: 0, loss: 0.502050, lr: 0.0000492531, speed: 0.8964 step/s[0

[32m[2023-06-06 01:29:35,892] [    INFO][0m - global step 9900/156300, epoch: 3, batch: 521, rank_id: 0, loss: 0.697300, lr: 0.0000477842, speed: 0.9313 step/s[0m
[32m[2023-06-06 01:31:28,004] [    INFO][0m - global step 10000/156300, epoch: 3, batch: 621, rank_id: 0, loss: 0.603186, lr: 0.0000477516, speed: 0.8921 step/s[0m
[32m[2023-06-06 01:33:17,832] [    INFO][0m - global step 10100/156300, epoch: 3, batch: 721, rank_id: 0, loss: 0.652068, lr: 0.0000477189, speed: 0.9106 step/s[0m
[32m[2023-06-06 01:35:08,032] [    INFO][0m - global step 10200/156300, epoch: 3, batch: 821, rank_id: 0, loss: 0.954892, lr: 0.0000476863, speed: 0.9076 step/s[0m
[32m[2023-06-06 01:37:00,951] [    INFO][0m - global step 10300/156300, epoch: 3, batch: 921, rank_id: 0, loss: 0.937043, lr: 0.0000476536, speed: 0.8857 step/s[0m
[32m[2023-06-06 01:38:51,690] [    INFO][0m - global step 10400/156300, epoch: 3, batch: 1021, rank_id: 0, loss: 0.705247, lr: 0.0000476210, speed: 0.9032 step/s[0

[32m[2023-06-06 03:00:00,514] [    INFO][0m - global step 14800/156300, epoch: 4, batch: 2295, rank_id: 0, loss: 0.744137, lr: 0.0000461847, speed: 0.9181 step/s[0m
[32m[2023-06-06 03:01:51,690] [    INFO][0m - global step 14900/156300, epoch: 4, batch: 2395, rank_id: 0, loss: 0.836293, lr: 0.0000461521, speed: 0.8996 step/s[0m
[32m[2023-06-06 03:03:44,465] [    INFO][0m - global step 15000/156300, epoch: 4, batch: 2495, rank_id: 0, loss: 0.738998, lr: 0.0000461194, speed: 0.8868 step/s[0m
[32m[2023-06-06 03:05:40,502] [    INFO][0m - global step 15100/156300, epoch: 4, batch: 2595, rank_id: 0, loss: 0.621049, lr: 0.0000460868, speed: 0.8619 step/s[0m
[32m[2023-06-06 03:07:33,484] [    INFO][0m - global step 15200/156300, epoch: 4, batch: 2695, rank_id: 0, loss: 0.637083, lr: 0.0000460542, speed: 0.8852 step/s[0m
[32m[2023-06-06 03:09:23,591] [    INFO][0m - global step 15300/156300, epoch: 4, batch: 2795, rank_id: 0, loss: 0.609382, lr: 0.0000460215, speed: 0.9083 ste

[32m[2023-06-06 04:30:39,141] [    INFO][0m - global step 19700/156300, epoch: 6, batch: 943, rank_id: 0, loss: 0.382402, lr: 0.0000445852, speed: 0.9047 step/s[0m
[32m[2023-06-06 04:32:30,394] [    INFO][0m - global step 19800/156300, epoch: 6, batch: 1043, rank_id: 0, loss: 0.619550, lr: 0.0000445526, speed: 0.8990 step/s[0m
[32m[2023-06-06 04:34:21,416] [    INFO][0m - global step 19900/156300, epoch: 6, batch: 1143, rank_id: 0, loss: 0.449046, lr: 0.0000445200, speed: 0.9008 step/s[0m
[32m[2023-06-06 04:36:15,945] [    INFO][0m - global step 20000/156300, epoch: 6, batch: 1243, rank_id: 0, loss: 0.468883, lr: 0.0000444873, speed: 0.8733 step/s[0m
[32m[2023-06-06 04:36:26,892] [    INFO][0m - tokenizer config file saved in checkpoints/tokenizer_config.json[0m
[32m[2023-06-06 04:36:26,894] [    INFO][0m - Special tokens file saved in checkpoints/special_tokens_map.json[0m
[32m[2023-06-06 04:38:18,608] [    INFO][0m - global step 20100/156300, epoch: 6, batch: 1343

[32m[2023-06-06 05:59:29,767] [    INFO][0m - global step 24500/156300, epoch: 7, batch: 2617, rank_id: 0, loss: 0.206880, lr: 0.0000430184, speed: 0.8529 step/s[0m
[32m[2023-06-06 06:01:19,507] [    INFO][0m - global step 24600/156300, epoch: 7, batch: 2717, rank_id: 0, loss: 0.165680, lr: 0.0000429858, speed: 0.9114 step/s[0m
[32m[2023-06-06 06:03:12,539] [    INFO][0m - global step 24700/156300, epoch: 7, batch: 2817, rank_id: 0, loss: 0.403042, lr: 0.0000429531, speed: 0.8848 step/s[0m
[32m[2023-06-06 06:05:01,895] [    INFO][0m - global step 24800/156300, epoch: 7, batch: 2917, rank_id: 0, loss: 0.607273, lr: 0.0000429205, speed: 0.9146 step/s[0m
[32m[2023-06-06 06:06:53,113] [    INFO][0m - global step 24900/156300, epoch: 7, batch: 3017, rank_id: 0, loss: 0.249556, lr: 0.0000428878, speed: 0.8993 step/s[0m
[32m[2023-06-06 06:08:41,567] [    INFO][0m - global step 25000/156300, epoch: 7, batch: 3117, rank_id: 0, loss: 0.255432, lr: 0.0000428552, speed: 0.9222 ste

[32m[2023-06-06 07:29:59,535] [    INFO][0m - global step 29400/156300, epoch: 9, batch: 1265, rank_id: 0, loss: 0.329019, lr: 0.0000414189, speed: 0.8825 step/s[0m
[32m[2023-06-06 07:31:49,022] [    INFO][0m - global step 29500/156300, epoch: 9, batch: 1365, rank_id: 0, loss: 0.195588, lr: 0.0000413863, speed: 0.9135 step/s[0m
[32m[2023-06-06 07:33:38,710] [    INFO][0m - global step 29600/156300, epoch: 9, batch: 1465, rank_id: 0, loss: 0.258116, lr: 0.0000413536, speed: 0.9118 step/s[0m
[32m[2023-06-06 07:35:27,065] [    INFO][0m - global step 29700/156300, epoch: 9, batch: 1565, rank_id: 0, loss: 0.306639, lr: 0.0000413210, speed: 0.9230 step/s[0m
[32m[2023-06-06 07:37:16,707] [    INFO][0m - global step 29800/156300, epoch: 9, batch: 1665, rank_id: 0, loss: 0.120928, lr: 0.0000412883, speed: 0.9122 step/s[0m
[32m[2023-06-06 07:39:09,402] [    INFO][0m - global step 29900/156300, epoch: 9, batch: 1765, rank_id: 0, loss: 0.235671, lr: 0.0000412557, speed: 0.8875 ste

[32m[2023-06-06 09:00:22,386] [    INFO][0m - global step 34300/156300, epoch: 10, batch: 3039, rank_id: 0, loss: 0.242871, lr: 0.0000398194, speed: 0.9122 step/s[0m
[32m[2023-06-06 09:02:12,727] [    INFO][0m - global step 34400/156300, epoch: 11, batch: 13, rank_id: 0, loss: 0.333795, lr: 0.0000397868, speed: 0.9064 step/s[0m
[32m[2023-06-06 09:04:01,901] [    INFO][0m - global step 34500/156300, epoch: 11, batch: 113, rank_id: 0, loss: 0.149994, lr: 0.0000397541, speed: 0.9161 step/s[0m
[32m[2023-06-06 09:05:53,476] [    INFO][0m - global step 34600/156300, epoch: 11, batch: 213, rank_id: 0, loss: 0.125461, lr: 0.0000397215, speed: 0.8964 step/s[0m
[32m[2023-06-06 09:07:48,467] [    INFO][0m - global step 34700/156300, epoch: 11, batch: 313, rank_id: 0, loss: 0.213298, lr: 0.0000396889, speed: 0.8697 step/s[0m
[32m[2023-06-06 09:09:37,687] [    INFO][0m - global step 34800/156300, epoch: 11, batch: 413, rank_id: 0, loss: 0.206577, lr: 0.0000396562, speed: 0.9157 ste

[32m[2023-06-06 10:30:45,625] [    INFO][0m - global step 39200/156300, epoch: 12, batch: 1687, rank_id: 0, loss: 0.083772, lr: 0.0000382199, speed: 0.9167 step/s[0m
[32m[2023-06-06 10:32:39,696] [    INFO][0m - global step 39300/156300, epoch: 12, batch: 1787, rank_id: 0, loss: 0.110543, lr: 0.0000381873, speed: 0.8768 step/s[0m
[32m[2023-06-06 10:34:31,085] [    INFO][0m - global step 39400/156300, epoch: 12, batch: 1887, rank_id: 0, loss: 0.071897, lr: 0.0000381546, speed: 0.8979 step/s[0m
[32m[2023-06-06 10:36:20,479] [    INFO][0m - global step 39500/156300, epoch: 12, batch: 1987, rank_id: 0, loss: 0.281830, lr: 0.0000381220, speed: 0.9143 step/s[0m
[32m[2023-06-06 10:38:09,471] [    INFO][0m - global step 39600/156300, epoch: 12, batch: 2087, rank_id: 0, loss: 0.188748, lr: 0.0000380894, speed: 0.9176 step/s[0m
[32m[2023-06-06 10:39:57,667] [    INFO][0m - global step 39700/156300, epoch: 12, batch: 2187, rank_id: 0, loss: 0.174213, lr: 0.0000380567, speed: 0.92

[32m[2023-06-06 11:59:33,328] [    INFO][0m - global step 44000/156300, epoch: 14, batch: 235, rank_id: 0, loss: 0.092806, lr: 0.0000366531, speed: 0.8848 step/s[0m
[32m[2023-06-06 12:01:27,895] [    INFO][0m - global step 44100/156300, epoch: 14, batch: 335, rank_id: 0, loss: 0.100913, lr: 0.0000366204, speed: 0.8730 step/s[0m
[32m[2023-06-06 12:03:16,978] [    INFO][0m - global step 44200/156300, epoch: 14, batch: 435, rank_id: 0, loss: 0.080963, lr: 0.0000365878, speed: 0.9169 step/s[0m
[32m[2023-06-06 12:05:05,489] [    INFO][0m - global step 44300/156300, epoch: 14, batch: 535, rank_id: 0, loss: 0.078679, lr: 0.0000365552, speed: 0.9217 step/s[0m
[32m[2023-06-06 12:06:56,174] [    INFO][0m - global step 44400/156300, epoch: 14, batch: 635, rank_id: 0, loss: 0.243676, lr: 0.0000365225, speed: 0.9036 step/s[0m
[32m[2023-06-06 12:08:46,988] [    INFO][0m - global step 44500/156300, epoch: 14, batch: 735, rank_id: 0, loss: 0.195702, lr: 0.0000364899, speed: 0.9025 ste

[32m[2023-06-06 13:29:59,637] [    INFO][0m - global step 48900/156300, epoch: 15, batch: 2009, rank_id: 0, loss: 0.141307, lr: 0.0000350536, speed: 0.9418 step/s[0m
[32m[2023-06-06 13:31:50,040] [    INFO][0m - global step 49000/156300, epoch: 15, batch: 2109, rank_id: 0, loss: 0.035481, lr: 0.0000350210, speed: 0.9059 step/s[0m
[32m[2023-06-06 13:33:38,465] [    INFO][0m - global step 49100/156300, epoch: 15, batch: 2209, rank_id: 0, loss: 0.047824, lr: 0.0000349883, speed: 0.9224 step/s[0m
[32m[2023-06-06 13:35:26,793] [    INFO][0m - global step 49200/156300, epoch: 15, batch: 2309, rank_id: 0, loss: 0.016877, lr: 0.0000349557, speed: 0.9233 step/s[0m
[32m[2023-06-06 13:37:17,923] [    INFO][0m - global step 49300/156300, epoch: 15, batch: 2409, rank_id: 0, loss: 0.122218, lr: 0.0000349230, speed: 0.9000 step/s[0m
[32m[2023-06-06 13:39:10,530] [    INFO][0m - global step 49400/156300, epoch: 15, batch: 2509, rank_id: 0, loss: 0.073347, lr: 0.0000348904, speed: 0.88

[32m[2023-06-06 15:00:31,331] [    INFO][0m - global step 53800/156300, epoch: 17, batch: 657, rank_id: 0, loss: 0.063194, lr: 0.0000334541, speed: 0.9123 step/s[0m
[32m[2023-06-06 15:02:22,180] [    INFO][0m - global step 53900/156300, epoch: 17, batch: 757, rank_id: 0, loss: 0.081131, lr: 0.0000334215, speed: 0.9023 step/s[0m
[32m[2023-06-06 15:04:14,322] [    INFO][0m - global step 54000/156300, epoch: 17, batch: 857, rank_id: 0, loss: 0.079001, lr: 0.0000333888, speed: 0.8918 step/s[0m
[32m[2023-06-06 15:06:05,682] [    INFO][0m - global step 54100/156300, epoch: 17, batch: 957, rank_id: 0, loss: 0.090772, lr: 0.0000333562, speed: 0.8981 step/s[0m
[32m[2023-06-06 15:07:56,844] [    INFO][0m - global step 54200/156300, epoch: 17, batch: 1057, rank_id: 0, loss: 0.049205, lr: 0.0000333235, speed: 0.8997 step/s[0m
[32m[2023-06-06 15:09:48,523] [    INFO][0m - global step 54300/156300, epoch: 17, batch: 1157, rank_id: 0, loss: 0.075157, lr: 0.0000332909, speed: 0.8955 s

[32m[2023-06-06 16:30:55,002] [    INFO][0m - global step 58700/156300, epoch: 18, batch: 2431, rank_id: 0, loss: 0.041807, lr: 0.0000318546, speed: 0.9089 step/s[0m
[32m[2023-06-06 16:32:50,671] [    INFO][0m - global step 58800/156300, epoch: 18, batch: 2531, rank_id: 0, loss: 0.040672, lr: 0.0000318220, speed: 0.8647 step/s[0m
[32m[2023-06-06 16:34:47,380] [    INFO][0m - global step 58900/156300, epoch: 18, batch: 2631, rank_id: 0, loss: 0.079626, lr: 0.0000317893, speed: 0.8569 step/s[0m
[32m[2023-06-06 16:36:35,450] [    INFO][0m - global step 59000/156300, epoch: 18, batch: 2731, rank_id: 0, loss: 0.044480, lr: 0.0000317567, speed: 0.9255 step/s[0m
[32m[2023-06-06 16:38:28,240] [    INFO][0m - global step 59100/156300, epoch: 18, batch: 2831, rank_id: 0, loss: 0.036665, lr: 0.0000317241, speed: 0.8867 step/s[0m
[32m[2023-06-06 16:40:17,281] [    INFO][0m - global step 59200/156300, epoch: 18, batch: 2931, rank_id: 0, loss: 0.034881, lr: 0.0000316914, speed: 0.91

[32m[2023-06-06 17:59:50,327] [    INFO][0m - global step 63500/156300, epoch: 20, batch: 979, rank_id: 0, loss: 0.029052, lr: 0.0000302878, speed: 0.9239 step/s[0m
[32m[2023-06-06 18:01:41,221] [    INFO][0m - global step 63600/156300, epoch: 20, batch: 1079, rank_id: 0, loss: 0.047503, lr: 0.0000302551, speed: 0.9019 step/s[0m
[32m[2023-06-06 18:03:34,927] [    INFO][0m - global step 63700/156300, epoch: 20, batch: 1179, rank_id: 0, loss: 0.039161, lr: 0.0000302225, speed: 0.8796 step/s[0m
[32m[2023-06-06 18:05:29,235] [    INFO][0m - global step 63800/156300, epoch: 20, batch: 1279, rank_id: 0, loss: 0.041827, lr: 0.0000301898, speed: 0.8749 step/s[0m
[32m[2023-06-06 18:07:16,965] [    INFO][0m - global step 63900/156300, epoch: 20, batch: 1379, rank_id: 0, loss: 0.062847, lr: 0.0000301572, speed: 0.9284 step/s[0m
[32m[2023-06-06 18:09:06,778] [    INFO][0m - global step 64000/156300, epoch: 20, batch: 1479, rank_id: 0, loss: 0.066469, lr: 0.0000301246, speed: 0.910

[32m[2023-06-06 19:30:19,941] [    INFO][0m - global step 68400/156300, epoch: 21, batch: 2753, rank_id: 0, loss: 0.015345, lr: 0.0000286883, speed: 0.9360 step/s[0m
[32m[2023-06-06 19:32:12,533] [    INFO][0m - global step 68500/156300, epoch: 21, batch: 2853, rank_id: 0, loss: 0.038895, lr: 0.0000286556, speed: 0.8883 step/s[0m
[32m[2023-06-06 19:34:02,363] [    INFO][0m - global step 68600/156300, epoch: 21, batch: 2953, rank_id: 0, loss: 0.046174, lr: 0.0000286230, speed: 0.9107 step/s[0m
[32m[2023-06-06 19:35:51,557] [    INFO][0m - global step 68700/156300, epoch: 21, batch: 3053, rank_id: 0, loss: 0.032151, lr: 0.0000285904, speed: 0.9160 step/s[0m
[32m[2023-06-06 19:37:41,682] [    INFO][0m - global step 68800/156300, epoch: 22, batch: 27, rank_id: 0, loss: 0.064449, lr: 0.0000285577, speed: 0.9082 step/s[0m
[32m[2023-06-06 19:39:31,286] [    INFO][0m - global step 68900/156300, epoch: 22, batch: 127, rank_id: 0, loss: 0.023632, lr: 0.0000285251, speed: 0.9125 

[32m[2023-06-06 21:00:47,535] [    INFO][0m - global step 73300/156300, epoch: 23, batch: 1401, rank_id: 0, loss: 0.023214, lr: 0.0000270888, speed: 0.9449 step/s[0m
[32m[2023-06-06 21:02:37,990] [    INFO][0m - global step 73400/156300, epoch: 23, batch: 1501, rank_id: 0, loss: 0.038140, lr: 0.0000270562, speed: 0.9055 step/s[0m
[32m[2023-06-06 21:04:25,766] [    INFO][0m - global step 73500/156300, epoch: 23, batch: 1601, rank_id: 0, loss: 0.012752, lr: 0.0000270235, speed: 0.9280 step/s[0m
[32m[2023-06-06 21:06:15,854] [    INFO][0m - global step 73600/156300, epoch: 23, batch: 1701, rank_id: 0, loss: 0.048165, lr: 0.0000269909, speed: 0.9085 step/s[0m
[32m[2023-06-06 21:08:09,938] [    INFO][0m - global step 73700/156300, epoch: 23, batch: 1801, rank_id: 0, loss: 0.024627, lr: 0.0000269582, speed: 0.8767 step/s[0m
[32m[2023-06-06 21:10:02,697] [    INFO][0m - global step 73800/156300, epoch: 23, batch: 1901, rank_id: 0, loss: 0.018604, lr: 0.0000269256, speed: 0.88

[32m[2023-06-06 22:31:12,024] [    INFO][0m - global step 78200/156300, epoch: 25, batch: 49, rank_id: 0, loss: 0.045322, lr: 0.0000254893, speed: 0.9204 step/s[0m
[32m[2023-06-06 22:33:01,413] [    INFO][0m - global step 78300/156300, epoch: 25, batch: 149, rank_id: 0, loss: 0.036429, lr: 0.0000254567, speed: 0.9143 step/s[0m
[32m[2023-06-06 22:34:55,949] [    INFO][0m - global step 78400/156300, epoch: 25, batch: 249, rank_id: 0, loss: 0.038559, lr: 0.0000254240, speed: 0.8732 step/s[0m
[32m[2023-06-06 22:36:49,588] [    INFO][0m - global step 78500/156300, epoch: 25, batch: 349, rank_id: 0, loss: 0.037625, lr: 0.0000253914, speed: 0.8801 step/s[0m
[32m[2023-06-06 22:38:38,595] [    INFO][0m - global step 78600/156300, epoch: 25, batch: 449, rank_id: 0, loss: 0.022630, lr: 0.0000253587, speed: 0.9175 step/s[0m
[32m[2023-06-06 22:40:26,541] [    INFO][0m - global step 78700/156300, epoch: 25, batch: 549, rank_id: 0, loss: 0.046021, lr: 0.0000253261, speed: 0.9265 step

[32m[2023-06-07 00:00:05,036] [    INFO][0m - global step 83000/156300, epoch: 26, batch: 1723, rank_id: 0, loss: 0.034305, lr: 0.0000239225, speed: 0.8906 step/s[0m
[32m[2023-06-07 00:01:55,531] [    INFO][0m - global step 83100/156300, epoch: 26, batch: 1823, rank_id: 0, loss: 0.029194, lr: 0.0000238898, speed: 0.9052 step/s[0m
[32m[2023-06-07 00:03:46,071] [    INFO][0m - global step 83200/156300, epoch: 26, batch: 1923, rank_id: 0, loss: 0.010506, lr: 0.0000238572, speed: 0.9048 step/s[0m
[32m[2023-06-07 00:05:34,391] [    INFO][0m - global step 83300/156300, epoch: 26, batch: 2023, rank_id: 0, loss: 0.012967, lr: 0.0000238245, speed: 0.9233 step/s[0m
[32m[2023-06-07 00:07:24,400] [    INFO][0m - global step 83400/156300, epoch: 26, batch: 2123, rank_id: 0, loss: 0.017138, lr: 0.0000237919, speed: 0.9092 step/s[0m
[32m[2023-06-07 00:09:13,580] [    INFO][0m - global step 83500/156300, epoch: 26, batch: 2223, rank_id: 0, loss: 0.016297, lr: 0.0000237593, speed: 0.91

[32m[2023-06-07 01:30:37,829] [    INFO][0m - global step 87900/156300, epoch: 28, batch: 371, rank_id: 0, loss: 0.027701, lr: 0.0000223230, speed: 0.8828 step/s[0m
[32m[2023-06-07 01:32:26,608] [    INFO][0m - global step 88000/156300, epoch: 28, batch: 471, rank_id: 0, loss: 0.020204, lr: 0.0000222903, speed: 0.9194 step/s[0m
[32m[2023-06-07 01:34:15,419] [    INFO][0m - global step 88100/156300, epoch: 28, batch: 571, rank_id: 0, loss: 0.010418, lr: 0.0000222577, speed: 0.9192 step/s[0m
[32m[2023-06-07 01:36:04,718] [    INFO][0m - global step 88200/156300, epoch: 28, batch: 671, rank_id: 0, loss: 0.005346, lr: 0.0000222251, speed: 0.9151 step/s[0m
[32m[2023-06-07 01:37:56,785] [    INFO][0m - global step 88300/156300, epoch: 28, batch: 771, rank_id: 0, loss: 0.032871, lr: 0.0000221924, speed: 0.8925 step/s[0m
[32m[2023-06-07 01:39:48,776] [    INFO][0m - global step 88400/156300, epoch: 28, batch: 871, rank_id: 0, loss: 0.010985, lr: 0.0000221598, speed: 0.8931 ste

[32m[2023-06-07 03:00:58,728] [    INFO][0m - global step 92800/156300, epoch: 29, batch: 2145, rank_id: 0, loss: 0.010982, lr: 0.0000207235, speed: 0.9257 step/s[0m
[32m[2023-06-07 03:02:51,574] [    INFO][0m - global step 92900/156300, epoch: 29, batch: 2245, rank_id: 0, loss: 0.016776, lr: 0.0000206908, speed: 0.8863 step/s[0m
[32m[2023-06-07 03:04:39,601] [    INFO][0m - global step 93000/156300, epoch: 29, batch: 2345, rank_id: 0, loss: 0.027785, lr: 0.0000206582, speed: 0.9258 step/s[0m
[32m[2023-06-07 03:06:30,230] [    INFO][0m - global step 93100/156300, epoch: 29, batch: 2445, rank_id: 0, loss: 0.005213, lr: 0.0000206256, speed: 0.9041 step/s[0m
[32m[2023-06-07 03:08:26,447] [    INFO][0m - global step 93200/156300, epoch: 29, batch: 2545, rank_id: 0, loss: 0.006108, lr: 0.0000205929, speed: 0.8606 step/s[0m
[32m[2023-06-07 03:10:22,396] [    INFO][0m - global step 93300/156300, epoch: 29, batch: 2645, rank_id: 0, loss: 0.019217, lr: 0.0000205603, speed: 0.86

[32m[2023-06-07 04:31:32,991] [    INFO][0m - global step 97700/156300, epoch: 31, batch: 793, rank_id: 0, loss: 0.017108, lr: 0.0000191240, speed: 0.9140 step/s[0m
[32m[2023-06-07 04:33:24,863] [    INFO][0m - global step 97800/156300, epoch: 31, batch: 893, rank_id: 0, loss: 0.008381, lr: 0.0000190914, speed: 0.8940 step/s[0m
[32m[2023-06-07 04:35:15,120] [    INFO][0m - global step 97900/156300, epoch: 31, batch: 993, rank_id: 0, loss: 0.022333, lr: 0.0000190587, speed: 0.9071 step/s[0m
[32m[2023-06-07 04:37:06,790] [    INFO][0m - global step 98000/156300, epoch: 31, batch: 1093, rank_id: 0, loss: 0.033630, lr: 0.0000190261, speed: 0.8956 step/s[0m
[32m[2023-06-07 04:39:00,331] [    INFO][0m - global step 98100/156300, epoch: 31, batch: 1193, rank_id: 0, loss: 0.026512, lr: 0.0000189934, speed: 0.8809 step/s[0m
[32m[2023-06-07 04:40:54,390] [    INFO][0m - global step 98200/156300, epoch: 31, batch: 1293, rank_id: 0, loss: 0.022911, lr: 0.0000189608, speed: 0.8769 

[32m[2023-06-07 06:00:18,464] [    INFO][0m - global step 102500/156300, epoch: 32, batch: 2467, rank_id: 0, loss: 0.010819, lr: 0.0000175572, speed: 0.9046 step/s[0m
[32m[2023-06-07 06:02:12,107] [    INFO][0m - global step 102600/156300, epoch: 32, batch: 2567, rank_id: 0, loss: 0.005819, lr: 0.0000175245, speed: 0.8801 step/s[0m
[32m[2023-06-07 06:04:10,011] [    INFO][0m - global step 102700/156300, epoch: 32, batch: 2667, rank_id: 0, loss: 0.012177, lr: 0.0000174919, speed: 0.8483 step/s[0m
[32m[2023-06-07 06:05:57,297] [    INFO][0m - global step 102800/156300, epoch: 32, batch: 2767, rank_id: 0, loss: 0.003342, lr: 0.0000174592, speed: 0.9323 step/s[0m
[32m[2023-06-07 06:07:49,224] [    INFO][0m - global step 102900/156300, epoch: 32, batch: 2867, rank_id: 0, loss: 0.030192, lr: 0.0000174266, speed: 0.8936 step/s[0m
[32m[2023-06-07 06:09:39,946] [    INFO][0m - global step 103000/156300, epoch: 32, batch: 2967, rank_id: 0, loss: 0.007391, lr: 0.0000173939, speed

[32m[2023-06-07 07:30:57,476] [    INFO][0m - global step 107400/156300, epoch: 34, batch: 1115, rank_id: 0, loss: 0.009496, lr: 0.0000159577, speed: 0.9010 step/s[0m
[32m[2023-06-07 07:32:50,121] [    INFO][0m - global step 107500/156300, epoch: 34, batch: 1215, rank_id: 0, loss: 0.012169, lr: 0.0000159250, speed: 0.8879 step/s[0m
[32m[2023-06-07 07:34:44,677] [    INFO][0m - global step 107600/156300, epoch: 34, batch: 1315, rank_id: 0, loss: 0.014613, lr: 0.0000158924, speed: 0.8731 step/s[0m
[32m[2023-06-07 07:36:30,833] [    INFO][0m - global step 107700/156300, epoch: 34, batch: 1415, rank_id: 0, loss: 0.012973, lr: 0.0000158597, speed: 0.9421 step/s[0m
[32m[2023-06-07 07:38:22,442] [    INFO][0m - global step 107800/156300, epoch: 34, batch: 1515, rank_id: 0, loss: 0.022737, lr: 0.0000158271, speed: 0.8961 step/s[0m
[32m[2023-06-07 07:40:09,433] [    INFO][0m - global step 107900/156300, epoch: 34, batch: 1615, rank_id: 0, loss: 0.006715, lr: 0.0000157945, speed

[32m[2023-06-07 09:01:28,754] [    INFO][0m - global step 112300/156300, epoch: 35, batch: 2889, rank_id: 0, loss: 0.007022, lr: 0.0000143582, speed: 0.9039 step/s[0m
[32m[2023-06-07 09:03:20,580] [    INFO][0m - global step 112400/156300, epoch: 35, batch: 2989, rank_id: 0, loss: 0.008550, lr: 0.0000143255, speed: 0.8944 step/s[0m
[32m[2023-06-07 09:05:09,355] [    INFO][0m - global step 112500/156300, epoch: 35, batch: 3089, rank_id: 0, loss: 0.003078, lr: 0.0000142929, speed: 0.9195 step/s[0m
[32m[2023-06-07 09:06:58,118] [    INFO][0m - global step 112600/156300, epoch: 36, batch: 63, rank_id: 0, loss: 0.006590, lr: 0.0000142603, speed: 0.9196 step/s[0m
[32m[2023-06-07 09:08:50,317] [    INFO][0m - global step 112700/156300, epoch: 36, batch: 163, rank_id: 0, loss: 0.007702, lr: 0.0000142276, speed: 0.8914 step/s[0m
[32m[2023-06-07 09:10:42,738] [    INFO][0m - global step 112800/156300, epoch: 36, batch: 263, rank_id: 0, loss: 0.012748, lr: 0.0000141950, speed: 0.

[32m[2023-06-07 10:31:58,824] [    INFO][0m - global step 117200/156300, epoch: 37, batch: 1537, rank_id: 0, loss: 0.010151, lr: 0.0000127587, speed: 0.9076 step/s[0m
[32m[2023-06-07 10:33:49,121] [    INFO][0m - global step 117300/156300, epoch: 37, batch: 1637, rank_id: 0, loss: 0.003976, lr: 0.0000127261, speed: 0.9068 step/s[0m
[32m[2023-06-07 10:35:40,895] [    INFO][0m - global step 117400/156300, epoch: 37, batch: 1737, rank_id: 0, loss: 0.009079, lr: 0.0000126934, speed: 0.8948 step/s[0m
[32m[2023-06-07 10:37:30,455] [    INFO][0m - global step 117500/156300, epoch: 37, batch: 1837, rank_id: 0, loss: 0.007959, lr: 0.0000126608, speed: 0.9129 step/s[0m
[32m[2023-06-07 10:39:21,423] [    INFO][0m - global step 117600/156300, epoch: 37, batch: 1937, rank_id: 0, loss: 0.010683, lr: 0.0000126281, speed: 0.9013 step/s[0m
[32m[2023-06-07 10:41:08,849] [    INFO][0m - global step 117700/156300, epoch: 37, batch: 2037, rank_id: 0, loss: 0.004533, lr: 0.0000125955, speed

[32m[2023-06-07 11:58:58,843] [    INFO][0m - global step 121900/156300, epoch: 38, batch: 3111, rank_id: 0, loss: 0.014688, lr: 0.0000112245, speed: 0.9218 step/s[0m
[32m[2023-06-07 12:00:49,183] [    INFO][0m - global step 122000/156300, epoch: 39, batch: 85, rank_id: 0, loss: 0.001539, lr: 0.0000111918, speed: 0.9064 step/s[0m
[32m[2023-06-07 12:02:40,097] [    INFO][0m - global step 122100/156300, epoch: 39, batch: 185, rank_id: 0, loss: 0.007541, lr: 0.0000111592, speed: 0.9017 step/s[0m
[32m[2023-06-07 12:04:34,896] [    INFO][0m - global step 122200/156300, epoch: 39, batch: 285, rank_id: 0, loss: 0.008714, lr: 0.0000111266, speed: 0.8712 step/s[0m
[32m[2023-06-07 12:06:26,221] [    INFO][0m - global step 122300/156300, epoch: 39, batch: 385, rank_id: 0, loss: 0.004104, lr: 0.0000110939, speed: 0.8984 step/s[0m
[32m[2023-06-07 12:08:13,648] [    INFO][0m - global step 122400/156300, epoch: 39, batch: 485, rank_id: 0, loss: 0.013214, lr: 0.0000110613, speed: 0.93

[32m[2023-06-07 13:29:30,195] [    INFO][0m - global step 126800/156300, epoch: 40, batch: 1759, rank_id: 0, loss: 0.006392, lr: 0.0000096250, speed: 0.8883 step/s[0m
[32m[2023-06-07 13:31:19,283] [    INFO][0m - global step 126900/156300, epoch: 40, batch: 1859, rank_id: 0, loss: 0.003965, lr: 0.0000095924, speed: 0.9168 step/s[0m
[32m[2023-06-07 13:33:11,002] [    INFO][0m - global step 127000/156300, epoch: 40, batch: 1959, rank_id: 0, loss: 0.008735, lr: 0.0000095597, speed: 0.8952 step/s[0m
[32m[2023-06-07 13:35:00,018] [    INFO][0m - global step 127100/156300, epoch: 40, batch: 2059, rank_id: 0, loss: 0.004989, lr: 0.0000095271, speed: 0.9174 step/s[0m
[32m[2023-06-07 13:36:47,400] [    INFO][0m - global step 127200/156300, epoch: 40, batch: 2159, rank_id: 0, loss: 0.006714, lr: 0.0000094944, speed: 0.9314 step/s[0m
[32m[2023-06-07 13:38:38,198] [    INFO][0m - global step 127300/156300, epoch: 40, batch: 2259, rank_id: 0, loss: 0.001795, lr: 0.0000094618, speed

[32m[2023-06-07 15:00:03,843] [    INFO][0m - global step 131700/156300, epoch: 42, batch: 407, rank_id: 0, loss: 0.009012, lr: 0.0000080255, speed: 0.8963 step/s[0m
[32m[2023-06-07 15:01:51,289] [    INFO][0m - global step 131800/156300, epoch: 42, batch: 507, rank_id: 0, loss: 0.003283, lr: 0.0000079929, speed: 0.9308 step/s[0m
[32m[2023-06-07 15:03:41,818] [    INFO][0m - global step 131900/156300, epoch: 42, batch: 607, rank_id: 0, loss: 0.003554, lr: 0.0000079602, speed: 0.9049 step/s[0m
[32m[2023-06-07 15:05:32,645] [    INFO][0m - global step 132000/156300, epoch: 42, batch: 707, rank_id: 0, loss: 0.006293, lr: 0.0000079276, speed: 0.9024 step/s[0m
[32m[2023-06-07 15:07:22,551] [    INFO][0m - global step 132100/156300, epoch: 42, batch: 807, rank_id: 0, loss: 0.008421, lr: 0.0000078949, speed: 0.9100 step/s[0m
[32m[2023-06-07 15:09:15,204] [    INFO][0m - global step 132200/156300, epoch: 42, batch: 907, rank_id: 0, loss: 0.005318, lr: 0.0000078623, speed: 0.88

[32m[2023-06-07 16:30:25,184] [    INFO][0m - global step 136600/156300, epoch: 43, batch: 2181, rank_id: 0, loss: 0.004327, lr: 0.0000064260, speed: 0.9290 step/s[0m
[32m[2023-06-07 16:32:16,148] [    INFO][0m - global step 136700/156300, epoch: 43, batch: 2281, rank_id: 0, loss: 0.005938, lr: 0.0000063934, speed: 0.9013 step/s[0m
[32m[2023-06-07 16:34:05,582] [    INFO][0m - global step 136800/156300, epoch: 43, batch: 2381, rank_id: 0, loss: 0.014332, lr: 0.0000063607, speed: 0.9139 step/s[0m
[32m[2023-06-07 16:35:56,243] [    INFO][0m - global step 136900/156300, epoch: 43, batch: 2481, rank_id: 0, loss: 0.002355, lr: 0.0000063281, speed: 0.9038 step/s[0m
[32m[2023-06-07 16:37:51,840] [    INFO][0m - global step 137000/156300, epoch: 43, batch: 2581, rank_id: 0, loss: 0.001638, lr: 0.0000062955, speed: 0.8652 step/s[0m
[32m[2023-06-07 16:39:47,851] [    INFO][0m - global step 137100/156300, epoch: 43, batch: 2681, rank_id: 0, loss: 0.005080, lr: 0.0000062628, speed

[32m[2023-06-07 17:57:25,409] [    INFO][0m - global step 141300/156300, epoch: 45, batch: 629, rank_id: 0, loss: 0.001587, lr: 0.0000048918, speed: 0.9050 step/s[0m
[32m[2023-06-07 17:59:15,968] [    INFO][0m - global step 141400/156300, epoch: 45, batch: 729, rank_id: 0, loss: 0.004543, lr: 0.0000048592, speed: 0.9046 step/s[0m
[32m[2023-06-07 18:01:05,335] [    INFO][0m - global step 141500/156300, epoch: 45, batch: 829, rank_id: 0, loss: 0.001415, lr: 0.0000048265, speed: 0.9145 step/s[0m
[32m[2023-06-07 18:02:58,254] [    INFO][0m - global step 141600/156300, epoch: 45, batch: 929, rank_id: 0, loss: 0.002481, lr: 0.0000047939, speed: 0.8857 step/s[0m
[32m[2023-06-07 18:04:48,997] [    INFO][0m - global step 141700/156300, epoch: 45, batch: 1029, rank_id: 0, loss: 0.005140, lr: 0.0000047613, speed: 0.9031 step/s[0m
[32m[2023-06-07 18:06:39,166] [    INFO][0m - global step 141800/156300, epoch: 45, batch: 1129, rank_id: 0, loss: 0.003225, lr: 0.0000047286, speed: 0.

[32m[2023-06-07 19:27:47,160] [    INFO][0m - global step 146200/156300, epoch: 46, batch: 2403, rank_id: 0, loss: 0.004024, lr: 0.0000032923, speed: 0.9022 step/s[0m
[32m[2023-06-07 19:29:40,313] [    INFO][0m - global step 146300/156300, epoch: 46, batch: 2503, rank_id: 0, loss: 0.001606, lr: 0.0000032597, speed: 0.8839 step/s[0m
[32m[2023-06-07 19:31:36,419] [    INFO][0m - global step 146400/156300, epoch: 46, batch: 2603, rank_id: 0, loss: 0.003898, lr: 0.0000032270, speed: 0.8614 step/s[0m
[32m[2023-06-07 19:33:28,158] [    INFO][0m - global step 146500/156300, epoch: 46, batch: 2703, rank_id: 0, loss: 0.004210, lr: 0.0000031944, speed: 0.8951 step/s[0m
[32m[2023-06-07 19:35:19,049] [    INFO][0m - global step 146600/156300, epoch: 46, batch: 2803, rank_id: 0, loss: 0.006353, lr: 0.0000031618, speed: 0.9019 step/s[0m
[32m[2023-06-07 19:37:10,088] [    INFO][0m - global step 146700/156300, epoch: 46, batch: 2903, rank_id: 0, loss: 0.002829, lr: 0.0000031291, speed

[32m[2023-06-07 20:54:54,465] [    INFO][0m - global step 150900/156300, epoch: 48, batch: 851, rank_id: 0, loss: 0.001876, lr: 0.0000017581, speed: 0.8914 step/s[0m
[32m[2023-06-07 20:56:44,618] [    INFO][0m - global step 151000/156300, epoch: 48, batch: 951, rank_id: 0, loss: 0.007644, lr: 0.0000017255, speed: 0.9080 step/s[0m
[32m[2023-06-07 20:58:36,792] [    INFO][0m - global step 151100/156300, epoch: 48, batch: 1051, rank_id: 0, loss: 0.005407, lr: 0.0000016928, speed: 0.8916 step/s[0m
[32m[2023-06-07 21:00:28,134] [    INFO][0m - global step 151200/156300, epoch: 48, batch: 1151, rank_id: 0, loss: 0.011273, lr: 0.0000016602, speed: 0.8983 step/s[0m
[32m[2023-06-07 21:02:22,348] [    INFO][0m - global step 151300/156300, epoch: 48, batch: 1251, rank_id: 0, loss: 0.001919, lr: 0.0000016276, speed: 0.8757 step/s[0m
[32m[2023-06-07 21:04:12,508] [    INFO][0m - global step 151400/156300, epoch: 48, batch: 1351, rank_id: 0, loss: 0.002378, lr: 0.0000015949, speed: 

[32m[2023-06-07 22:25:26,612] [    INFO][0m - global step 155800/156300, epoch: 49, batch: 2625, rank_id: 0, loss: 0.001987, lr: 0.0000001586, speed: 0.8503 step/s[0m
[32m[2023-06-07 22:27:15,174] [    INFO][0m - global step 155900/156300, epoch: 49, batch: 2725, rank_id: 0, loss: 0.001197, lr: 0.0000001260, speed: 0.9213 step/s[0m
[32m[2023-06-07 22:29:08,297] [    INFO][0m - global step 156000/156300, epoch: 49, batch: 2825, rank_id: 0, loss: 0.002396, lr: 0.0000000934, speed: 0.8841 step/s[0m
[32m[2023-06-07 22:30:56,250] [    INFO][0m - global step 156100/156300, epoch: 49, batch: 2925, rank_id: 0, loss: 0.001298, lr: 0.0000000607, speed: 0.9265 step/s[0m
[32m[2023-06-07 22:32:48,262] [    INFO][0m - global step 156200/156300, epoch: 49, batch: 3025, rank_id: 0, loss: 0.001472, lr: 0.0000000281, speed: 0.8929 step/s[0m
[32m[2023-06-07 22:34:37,264] [    INFO][0m - global step 156300/156300, epoch: 49, batch: 3125, rank_id: 0, loss: 0.000058, lr: 0.0000000000, speed

# 输出测试

In [54]:
model = PegasusForConditionalGeneration.from_pretrained('checkpoints')
model.eval()
tokenizer = PegasusChineseTokenizer.from_pretrained('checkpoints')

In [55]:
num_beams = 4
def infer(text, model, tokenizer):
    tokenized = tokenizer(text, 
                          truncation=True, 
                          max_length=max_source_length, 
                          return_tensors='pd')
    preds, _ = model.generate(input_ids=tokenized['input_ids'],
                              max_length=160,
                              min_length=min_target_length,
                              decode_strategy='beam_search',
                              num_beams=num_beams)
    return(tokenizer.decode(preds[0], skip_special_tokens=True, clean_up_tokenization_spaces=False))

In [56]:
validation_file='./process_data_62/test_data.csv'
validation_data=pd.read_csv(validation_file,encoding='utf-8',sep='|',header=0)
text=validation_data.iat[0,1]
validation_data.head(5)

Unnamed: 0,id,content
0,25001,客:个我刚收到们个话费通知的短信我看说我上个月消费二十一块多吧有五元是我的包月费剩下有16块...
1,25002,客:个先说个不太好意思今天不是我之前投诉不是个扣费的问题，个今天今天应该给我反馈回来有服务电...
2,25003,客:个我套餐到期没人通知我原来是138块6的现在它我这刚发现现在是按198坐:帮查来电手机号...
3,25004,客:我想问我个话费现在欠坐:来电这号码欠费钱是坐:行号码现在欠费2月份的165.75165....
4,25005,客:你帮我查我有业务是返返钱的业务一放1月份的给我返个九十九十九月好像还能返28你帮我看客:...


In [57]:
validation_data['content'].str.len().describe()

count    7522.000000
mean      421.702340
std       120.174324
min        15.000000
25%       334.000000
50%       512.000000
75%       512.000000
max       513.000000
Name: content, dtype: float64

In [58]:
def read_file(filename,num=2):
    lines = []
    with open(filename, 'r', encoding='utf-8') as f:
        next(f)
        if num==3:
            for line in f:
                lines.append({"id":line.split("|")[0].strip(),"content":line.split("|")[1].strip(),"abstract":line.split("|")[2].strip()})
        else:
            for line in f:
                lines.append({"id":line.split("|")[0].strip(),"content":line.split("|")[1].strip()})
    return lines
infer(text, model, tokenizer)

'用户来电对01月产生的15.93元的流量费有疑义，我方经系统查询安上网日志向用户解释，用户不认可，我方按快速处理向用户核减费用，用户不认可要求核实原因，请处理，谢谢。'

In [59]:
validation_lines=read_file(validation_file)

In [60]:
validation_lines[0]

{'id': '25001',
 'content': '客:个我刚收到们个话费通知的短信我看说我上个月消费二十一块多吧有五元是我的包月费剩下有16块多的个的消费我没有进行过何的上网通话的我想查查个什费用坐:我看1月二十一块8上网流量费15块9毛3国内通话费0.15元到达电话打一分钟流量应该是超出一部分用15块9毛3的流量费客:对我肯定没有用流量我是退休在家我家里有WIFI我不出去玩一星之前我刚激活根本连买菜都不用我出去流量是怎出来的钱这肯定有积分问题坐:用五十五十九兆流量扣19块15块9毛7的费用客:59兆流量六块钱坐:电话对流量超出不认可给进行上报反馈后期会有专人给回电处理客:他最后给我回电怎处理，我要求这返返费我肯定没消费说电话电话有过，电话我没打过我家里人没用我电话打过每个人都有手续坐:现在无法给进行核实只能看到使用的超出是没有使用我给记录上报后期会有专人给客:你给我上报余额实际话费反正就这15块几吧我肯定没花点钱我肯定要交的我绝对没用过我就没出过门激活坐:好我给记录上报后期会有专人给回复保持电话畅通客:直接就给我发短信坐:24小时你回复电话坐:感谢来电稍后按两个一谢谢'}

In [61]:
result_data=pd.DataFrame()
idx = 0
for line in validation_lines:
    idx +=1
    validation_id=line['id']
    content=line['content']
    ret=infer(content,model, tokenizer)
    result={"id":validation_id,"ret":ret}
    result_data=result_data._append(result,ignore_index=True)
    if idx % 100 == 0 or idx in [1,2,3,4,5,6]:
        print("数据生成进行中，当前进行ID：",validation_id)
result_data.to_csv('result.csv',  index=False,encoding='utf-8',sep ='|',header =['id','ret'])

数据生成进行中，当前进行ID： 25001
数据生成进行中，当前进行ID： 25002
数据生成进行中，当前进行ID： 25003
数据生成进行中，当前进行ID： 25004
数据生成进行中，当前进行ID： 25005
数据生成进行中，当前进行ID： 25006
数据生成进行中，当前进行ID： 25100
数据生成进行中，当前进行ID： 25200
数据生成进行中，当前进行ID： 25300
数据生成进行中，当前进行ID： 25400
数据生成进行中，当前进行ID： 25500
数据生成进行中，当前进行ID： 25600
数据生成进行中，当前进行ID： 25700
数据生成进行中，当前进行ID： 25800
数据生成进行中，当前进行ID： 25900
数据生成进行中，当前进行ID： 26000
数据生成进行中，当前进行ID： 26100
数据生成进行中，当前进行ID： 26200
数据生成进行中，当前进行ID： 26300
数据生成进行中，当前进行ID： 26400
数据生成进行中，当前进行ID： 26500
数据生成进行中，当前进行ID： 26600
数据生成进行中，当前进行ID： 26700
数据生成进行中，当前进行ID： 26800
数据生成进行中，当前进行ID： 26900
数据生成进行中，当前进行ID： 27000
数据生成进行中，当前进行ID： 27100
数据生成进行中，当前进行ID： 27200
数据生成进行中，当前进行ID： 27300
数据生成进行中，当前进行ID： 27400
数据生成进行中，当前进行ID： 27500
数据生成进行中，当前进行ID： 27600
数据生成进行中，当前进行ID： 27700
数据生成进行中，当前进行ID： 27800
数据生成进行中，当前进行ID： 27900
数据生成进行中，当前进行ID： 28000
数据生成进行中，当前进行ID： 28100
数据生成进行中，当前进行ID： 28200
数据生成进行中，当前进行ID： 28300
数据生成进行中，当前进行ID： 28400
数据生成进行中，当前进行ID： 28500
数据生成进行中，当前进行ID： 28600
数据生成进行中，当前进行ID： 28700
数据生成进行中，当前进行ID： 28800
数据生成进行中，当前进行ID： 28900
数据生成进行中，当前

In [62]:
!tar -cvf checkpoints64.tar checkpoints/

checkpoints/
checkpoints/model_config.json
checkpoints/vocab.txt
checkpoints/model_state.pdparams
checkpoints/tokenizer_config.json
checkpoints/special_tokens_map.json


In [51]:
!rm -rf checkpoints/