In [2]:
import torch
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, DataCollatorForSeq2Seq, Seq2SeqTrainer, Seq2SeqTrainingArguments

In [3]:
import json
import random
# 读取train.json文件
# train_data = []

# 逐行读取train.json文件
# with open('./LCSTS/train.json', 'r', encoding='utf-8') as f:
#     for line in f:
#         data = json.loads(line)
#         train_data.append(data)
        
# 读取train.json文件并随机选择5000条数据
train_data = []
num_samples = 5000

with open('./LCSTS/train.json', 'r', encoding='utf-8') as f:
    for line in f:
        data = json.loads(line)
        train_data.append(data)

# 随机选择5000条数据
train_data = random.sample(train_data, num_samples)        


# # 读取dev.json文件
# with open('./LCSTS/dev.json', 'r', encoding='utf-8') as f:
#     dev_data = json.load(f)

# 处理数据集，提取id、summary和content
def process_data(data):
    processed_data = []
    for item in data:
        processed_item = {
            "id": item["id"],
            "summary": item["summary"],
            "content": item["content"]
        }
        processed_data.append(processed_item)
    return processed_data

# # 处理train数据
processed_train_data = process_data(train_data)

# # 处理dev数据
# processed_dev_data = process_data(dev_data)

# 将数据整理成Dataset格式
train_dataset = Dataset.from_dict({"id": [item["id"] for item in processed_train_data],
                                   "title": [item["summary"] for item in processed_train_data],
                                   "content": [item["content"] for item in processed_train_data]})

# # 划分训练集和测试集
ds = train_dataset.train_test_split(100, seed=42)
ds

# train_size = int(0.7 * len(train_dataset))
# val_size = int(0.1 * len(train_dataset))
# test_size = len(train_dataset) - train_size - val_size

# ds = train_dataset.train_test_split(train_size=train_size, test_size=test_size, seed=42)
# ds

# 现在你可以像之前处理nlpcc_2017数据集一样使用train_dataset和test_dataset了

DatasetDict({
    train: Dataset({
        features: ['id', 'title', 'content'],
        num_rows: 4900
    })
    test: Dataset({
        features: ['id', 'title', 'content'],
        num_rows: 100
    })
})

In [6]:
ds["train"][0]

{'id': 518719,
 'title': '三中全会临近吴敬琏警示危机：资产负债率太高',
 'content': '吴敬琏警示道，目前宏观与微观经济两方面正开始显现危机。吴敬琏指出，随着资产负债率的日益攀升，一些地区出现的危机很快传到其他地方，比如温州、苏南等地相继出现的“跑路”事件，包括6月的钱荒，其实都是给人们发预警信号。'}

In [7]:
ds["train"][1]

{'id': 785950,
 'title': '中国人去年买走全球47%奢侈品消费总额6000多亿',
 'content': '中国人去年奢侈品消费总额为1020亿美元，合6000多亿元人民币，这也表明中国人买走了全球47%的奢侈品，是全球奢侈品市场无可争议的最大客户。知名奢侈品牌几乎已经全部进入中国。'}

In [8]:
#模型下载
from modelscope import snapshot_download
# model_dir = snapshot_download('langboat/mengzi-t5-base',revision='v1.0.0')
model_dir = snapshot_download('ZhipuAI/glm-large-chinese',cache_dir='model2')
print(model_dir)

# #模型下载
# from modelscope import snapshot_download
# model_dir = snapshot_download('ZhipuAI/glm-large-chinese')

model2/ZhipuAI/glm-large-chinese


In [11]:
# 对于高版本的Transformers加载会报错，需要修改源码
# 文件地址 ~/.cache\huggingface\modules\transformers_modules\THUDM\glm-large-chinese\230f54e413fab4bc8f29bd3508aab301d757ef3e\tokenization_glm.py
# 231行 super().__init__(**kwargs) 移动至 235行，放至self.sp_model.Load(vocab_file)的后面一行
tokenizer = AutoTokenizer.from_pretrained("model2/ZhipuAI/glm-large-chinese", trust_remote_code=True)
# tokenizer


In [15]:
def process_func(exmaples):
    contents = ["摘要生成: \n" + e + tokenizer.mask_token for e in exmaples["content"]]
    inputs = tokenizer(contents, max_length=256, truncation=True, padding="max_length", return_tensors="pt")
    inputs = tokenizer.build_inputs_for_generation(inputs, targets=exmaples['title'], padding=True, max_gen_length=64)
    return inputs

In [16]:
tokenized_ds = ds.map(process_func, batched=True, remove_columns=ds["train"].column_names)
tokenized_ds

Map:   0%|          | 0/4500 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'position_ids', 'attention_mask', 'labels'],
        num_rows: 4500
    })
    test: Dataset({
        features: ['input_ids', 'position_ids', 'attention_mask', 'labels'],
        num_rows: 500
    })
})

In [17]:
tokenizer.decode(tokenized_ds["train"][0]["input_ids"])

'[CLS] 摘要生成: 吴敬琏警示道,目前宏观与微观经济两方面正开始显现危机。吴敬琏指出,随着资产负债率的日益攀升,一些地区出现的危机很快传到其他地方,比如温州、苏南等地相继出现的“跑路”事件,包括6月的钱荒,其实都是给人们发预警信号。[MASK]<|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|en

In [18]:
tokenized_ds["train"][0]["labels"]

[-100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,
 -100,

In [19]:
print(tokenized_ds["train"][0]["position_ids"])

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221

In [20]:
model = AutoModelForSeq2SeqLM.from_pretrained("model2/ZhipuAI/glm-large-chinese", trust_remote_code=True)

In [21]:
# import torch
# from datasets import Dataset
# from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, DataCollatorForSeq2Seq, Seq2SeqTrainer, Seq2SeqTrainingArguments

# # 加载数据集
# ds = Dataset.load_from_disk("./nlpcc_2017/")
# ds = ds.train_test_split(100, seed=42)

# # 加载分词器
# tokenizer = AutoTokenizer.from_pretrained("model2/ZhipuAI/glm-large-chinese", trust_remote_code=True)

# # 数据预处理函数
# def process_func(examples):
#     contents = ["摘要生成: \n" + e + tokenizer.mask_token for e in examples["content"]]
#     inputs = tokenizer(contents, max_length=384, truncation=True, padding="max_length", return_tensors="pt")
#     inputs = tokenizer.build_inputs_for_generation(inputs, targets=examples['title'], padding=True, max_gen_length=64)
#     return inputs

# # 处理数据集
# tokenized_ds = ds.map(process_func, batched=True, remove_columns=ds["train"].column_names)

# # 加载模型
# model = AutoModelForSeq2SeqLM.from_pretrained("model2/ZhipuAI/glm-large-chinese", trust_remote_code=True)

# 设置训练参数
args = Seq2SeqTrainingArguments(
    output_dir="./summary_glm",
    per_device_train_batch_size=4,
    per_device_eval_batch_size=8,
    gradient_accumulation_steps=8,
    logging_steps=8,
    num_train_epochs=1,
    save_total_limit=1,  # 只保留最近的一个模型
    save_steps=512,       # 每500步保存一次模型
    # eval_strategy="steps",  # 评估的策略
    load_best_model_at_end=True,  # 在训练结束时加载最佳模型
    eval_strategy="steps",
)

# args = Seq2SeqTrainingArguments(
#     output_dir="./summary_glm",
#     per_device_train_batch_size=4,
#     per_device_eval_batch_size=8,
#     gradient_accumulation_steps=8,
#     logging_steps=8,
#     num_train_epochs=1
# )

# 创建训练器，添加 eval_dataset
trainer = Seq2SeqTrainer(
    args=args,
    model=model,
    train_dataset=tokenized_ds["train"],  # 训练集
    eval_dataset=tokenized_ds['test'],  # 评估集
    tokenizer=tokenizer,
)

# trainer = Seq2SeqTrainer(
#     args=args,
#     model=model,
#     train_dataset=tokenized_ds["train"],
#     tokenizer=tokenizer,
# )
# 开始训练
trainer.train()

Detected kernel version 4.19.91, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


[2024-10-01 23:05:36,427] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)


df: /root/.triton/autotune: 没有那个文件或目录




Step,Training Loss,Validation Loss
8,3.2005,2.899231
16,3.0754,2.816801
24,2.9051,2.798641
32,2.9223,2.805028
40,3.0185,2.776683
48,2.9373,2.763617
56,2.8229,2.764317
64,2.8317,2.754272
72,2.8438,2.747238
80,2.9022,2.735222


TrainOutput(global_step=140, training_loss=2.9211345400129045, metrics={'train_runtime': 1152.5461, 'train_samples_per_second': 3.904, 'train_steps_per_second': 0.121, 'total_flos': 3041186650521600.0, 'train_loss': 2.9211345400129045, 'epoch': 0.9955555555555555})

In [23]:
# # 保存模型和分词器
# trainer.save_model(args.output_dir)  # 保存模型
# tokenizer.save_pretrained(args.output_dir)  # 保存分词器

# 保存模型
path='./summary_glm/checkpoint-140'

model.save_pretrained('./summary_glm/checkpoint-140')
# 保存分词器
tokenizer.save_pretrained('./summary_glm/checkpoint-140')



('./summary_glm/checkpoint-140/tokenizer_config.json',
 './summary_glm/checkpoint-140/special_tokens_map.json',
 './summary_glm/checkpoint-140/cog-pretrain.model',
 './summary_glm/checkpoint-140/added_tokens.json')

In [4]:
# 加载模型进行推理

path='./checkpoint-140'
# 检查是否有可用的 GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoModelForSeq2SeqLM.from_pretrained(path,trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(path,trust_remote_code=True)
# 将模型移动到 GPU
model.to(device)


GLMForConditionalGeneration(
  (glm): GLMModel(
    (word_embeddings): VocabEmbedding()
    (transformer): GLMStack(
      (embedding_dropout): Dropout(p=0.1, inplace=False)
      (position_embeddings): Embedding(1025, 1024)
      (block_position_embeddings): Embedding(1025, 1024)
      (layers): ModuleList(
        (0-23): 24 x GLMBlock(
          (input_layernorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
          (attention): SelfAttention(
            (query_key_value): Linear(in_features=1024, out_features=3072, bias=True)
            (attention_dropout): Dropout(p=0.1, inplace=False)
            (dense): Linear(in_features=1024, out_features=1024, bias=True)
            (output_dropout): Dropout(p=0.1, inplace=False)
          )
          (post_attention_layernorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
          (mlp): MLP(
            (dense_h_to_4h): Linear(in_features=1024, out_features=4096, bias=True)
            (dense_4h_to_h): Linear(in_fe

In [7]:
# with torch.no_grad():
#     output = model.generate(**inputs, max_new_tokens=64, eos_token_id=tokenizer.eop_token_id, do_sample=True)

# # 解码并输出摘要
# summary = tokenizer.decode(output[0].tolist(), skip_special_tokens=True)
# print("生成的摘要:", summary)


# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# # 将模型移动到 GPU
# model.to(device)

input_text = ds["test"][0]["content"]
inputs = tokenizer("摘要生成: \n" + input_text + tokenizer.mask_token, return_tensors="pt")
inputs = tokenizer.build_inputs_for_generation(inputs, max_gen_length=64)
# inputs = inputs.to("cpu")
# 将输入张量移动到与模型相同的设备
inputs = {key: value.to(device) for key, value in inputs.items()}


output = model.generate(**inputs, max_new_tokens=64, eos_token_id=tokenizer.eop_token_id, do_sample=True)
tokenizer.decode(output[0].tolist())

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


'[CLS] 摘要生成: 乌拉圭足球队的路易斯·苏亚雷斯在24日的世界杯赛场上疑似咬了意大利队的球员乔治·基耶利尼,此事引起轩然大波。25日一名挪威男子未卜先知,竟以苏亚雷斯咬对方球员为赌注,最后赢得了916美元(约合人民币5708元)[MASK]<|endoftext|> <|startofpiece|> 乌拉圭男子世界杯上咬人事件影响深远 <|endofpiece|>'

In [8]:
import torch

model = model.eval()

def predict_test():
    predict = []
    with torch.inference_mode():
        for d in ds["test"]:
            inputs = tokenizer("摘要生成: \n" + d["content"] + tokenizer.mask_token, return_tensors="pt")
            inputs = tokenizer.build_inputs_for_generation(inputs, max_gen_length=64)
            inputs = inputs.to("cuda")
            output = model.generate(**inputs, max_new_tokens=64, eos_token_id=tokenizer.eop_token_id, do_sample=True)
            predict.append(tokenizer.decode(output[0].tolist()).split("<|startofpiece|>")[1].replace("<|endofpiece|>", "").strip())
            print("curID:", len(predsict))
    return predicts

In [9]:
result = predict_test()
result

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 1


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 2


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


curID: 3
curID: 4


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 5


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 6


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 7


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 8


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 9


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 10
curID: 11


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 12


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 13
curID: 14


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 15


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 16


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 17


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 18


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 19


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 20
curID: 21


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 22


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 23


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 24


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 25


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 26


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 27


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 28


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 29


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 30


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 31


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 32
curID: 33


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 34
curID: 35


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 36
curID: 37


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 38


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 39


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 40


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 41


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 42


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 43
curID: 44


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 45


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 46


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 47


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 48
curID: 49


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 50


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 51


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 52


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 53


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 54


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 55
curID: 56


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 57


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 58


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 59
curID: 60


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 61
curID: 62


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 63


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 64


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 65
curID: 66


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 67


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 68


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 69


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 70


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 71


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 72


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 73


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 74
curID: 75


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 76


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 77


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 78


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 79
curID: 80


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 81


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 82
curID: 83


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 84


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 85


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 86


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 87


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 88


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 89


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 90


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 91


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 92
curID: 93


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 94


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 95


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 96


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 97


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 98


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


curID: 99
curID: 100


['乌拉圭队长咬人被扣5708,男子以他赢',
 '中国博彩大曝光:赌场色情交易竟如此暴利',
 '黑龙江庆安县在全市率先开展煤矿隐患排查行动',
 'iPad:环保明星,毁天灭地',
 '南方都市报:上缴红利90%返人民',
 '工地工人在永康漂满卫生巾,究竟是在大姨妈?',
 '北京市丰台区人民检察院批准逮捕一非法持有毒品案嫌犯',
 '央行“乌龙指”背后的监管“盲区”',
 '新模式开发商纷纷出海进军农业地产模式或成为农业发展新方向',
 '凯特王妃身穿小黑裙参加慈善晚宴',
 '南京博物馆发现宋代女尸',
 '2014年总结展望2015年再作精编',
 '深中学女生“黑丝上阵"引争议',
 '给狗注射毒镖?',
 '海峡两岸金融市场深度融合',
 '40岁男人盗窃电瓶成惯犯:打工赚点家用',
 '英国:希腊留欧并不会带来全球经济复苏',
 '南都:广深线2月1日起开行潮汕间跨线动车',
 '母女被保姆砍中面部多处伤索赔40万',
 '广州男子爬桥救援被封锁:一天让我堵2次车',
 '上海最贵商圈原来是地铁沿线',
 '德甲半程最能跑球员盘点:德里达成两队第一',
 '24国驻华外交官:全球化禁毒合作促禁毒事业发展',
 '梅嫩德兹拒绝嫖妓拒透露医生办公室搜查过程',
 '“国酒茅台”商标申请已获93件次异议',
 '莎拉波娃若遭禁喊叫可能难有昔日精彩',
 '央行防通胀信号难消弱通胀是重中之重',
 '《餐饮业经营管理办法》出台餐饮业再开罚',
 '32位企业行家里手揭秘电信企业管理真相',
 '天猫2亿补贴京东3C数码电商',
 '温州富翁自述在中国若想发财就必须靠这3点',
 '湖南长沙市长沙县建设文化文化站配齐图书室阅览室',
 '长江实业等概念股普涨',
 '沪深两市“重组荒”:有戏不搞继续留',
 '7年级下册教材将收录钓鱼岛等',
 '地产落子省城青啤地产再受追捧',
 '红牛:含禁添加剂已下架',
 '英伟达第四财季净利润1.93亿美元同比增长31%',
 '华盛顿“石油大王”毅然弃京从心追根寻源树神木',
 '大热美食“海鱼”遭检疫部门“扣押”',
 '监控拍走法拉利跑车闯9个红灯司机被判罚',
 '味精企业上调价格不足半月行业有望迎新一轮上行周期',
 '高考方案10月底前印发湖南高考改革有望年内完成',
 '超市超市,汉堡上市啦!',
 

In [10]:
from rouge_chinese import Rouge

rouge = Rouge()

docode_preds = [" ".join(p) for p in result]
decode_labels = [" ".join(l) for l in ds["test"]["title"]]
scores = rouge.get_scores(docode_preds, decode_labels, avg=True)
{
    "rouge-1": scores["rouge-1"]["f"],
    "rouge-2": scores["rouge-2"]["f"],
    "rouge-l": scores["rouge-l"]["f"],
}

{'rouge-1': 0.2882684614718663,
 'rouge-2': 0.1517349899148715,
 'rouge-l': 0.23692376490333736}

In [100]:
# input_text=''
import jieba
from rouge_chinese import Rouge

input_text = ds["test"][-1]["content"]
title = ds["test"][-1]["title"]

inputs = tokenizer("摘要生成: \n" + input_text + tokenizer.mask_token, return_tensors="pt")
inputs = tokenizer.build_inputs_for_generation(inputs, max_gen_length=64)
# inputs = inputs.to("cpu")
# 将输入张量移动到与模型相同的设备
inputs = {key: value.to(device) for key, value in inputs.items()}

output = model.generate(**inputs, max_new_tokens=64, eos_token_id=tokenizer.eop_token_id,do_sample=True)
print(tokenizer.decode(output[0].tolist()))

# 原始文本
text = tokenizer.decode(output[0].tolist())
# 提取数据
start_marker = '<|startofpiece|>'
end_marker = '<|endofpiece|>'
# 找到起始和结束位置
start_index = text.find(start_marker) + len(start_marker)
end_index = text.find(end_marker)
# 提取所需数据
extracted_data = text[start_index:end_index].strip()
# 打印结果

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


[CLS] 摘要生成: “千年不大黄杨树”,这句俗语一方面反映了黄杨生长速度极其缓慢的植物特性,另一方面也反映了黄杨是一种很长寿的树种。不过,记者昨在扬州文昌中路459号发现,院内两棵百岁以上瓜子黄杨已不在——在枯死后被挖除。[MASK]<|endoftext|> <|startofpiece|> 扬州文昌中路两棵百岁以上瓜子黄杨不知去向 <|endofpiece|>


In [111]:

# # 定义预测和实际值
# result = title
# predict = extracted_data
# # 初始化 ROUGE 评分器
# rouge = Rouge()
# # 使用 jieba 进行分词
# result_tokens = " ".join(jieba.cut(result))  # 实际值分词
# predict_tokens = " ".join(jieba.cut(predict))  # 预测值分词

# print('\n原文title内容为:  '+title)
# print("\n")
# print('文本摘要内容为:  '+extracted_data)
# print("\n分词情况\n")
# print(result_tokens)
# print(predict_tokens)

# # 将实际值和预测值放入列表
# decode_preds = [predict_tokens]  # 预测值
# decode_labels = [result_tokens]   # 实际值

# # 计算 ROUGE 分数
# scores = rouge.get_scores(decode_preds, decode_labels, avg=True)

# # 提取 ROUGE 分数
# rouge_scores = {
#     "rouge-1": scores["rouge-1"]["f"],
#     "rouge-2": scores["rouge-2"]["f"],
#     "rouge-l": scores["rouge-l"]["f"],
# }
# # 输出 ROUGE 分数
# print("\n准确率情况\n")
# print(rouge_scores)


# import jieba
# from rouge import Rouge
from bert_score import score


def calculate_bert_score(predict, result):
    # 计算BERTScore
    P, R, F1 = score([predict], [result], lang='zh')
    bert_score = F1.mean().item()
    return bert_score

predict = "这是一个示例预测值"
result = "这是一个示例实际值"

bert_score_result = calculate_bert_score(predict, result)
print("BERTScore值为:", bert_score_result)


# result = title
# predict = extracted_data
# rouge = Rouge()

# result_tokens = " ".join(jieba.cut(result))
# predict_tokens = " ".join(jieba.cut(predict))

# decode_preds = [predict_tokens]
# decode_labels = [result_tokens]

# scores = rouge.get_scores(decode_preds, decode_labels, avg=True)

# rouge_scores = {
#     "rouge-1": scores["rouge-1"]["f"],
#     "rouge-2": scores["rouge-2"]["f"],
#     "rouge-l": scores["rouge-l"]["f"],
# }

# P, R, F1 = score([predict_tokens], [result_tokens], lang='zh')
# bert_score = F1.mean().item()

# print("\nROUGE 分数：")
# print(rouge_scores)
# print("\nBERTScore：", bert_score)

OSError: We couldn't connect to 'https://huggingface.co' to load this file, couldn't find it in the cached files and it looks like bert-base-chinese is not the path to a directory containing a file named config.json.
Checkout your internet connection or see how to run the library in offline mode at 'https://huggingface.co/docs/transformers/installation#offline-mode'.

In [83]:
def generate_summary(input_text, tokenizer=tokenizer, model=model, device=device):
    # 生成摘要的输入
    inputs = tokenizer("摘要生成: \n" + input_text + tokenizer.mask_token, return_tensors="pt")
    inputs = tokenizer.build_inputs_for_generation(inputs, max_gen_length=64)

    # 将输入张量移动到与模型相同的设备
    inputs = {key: value.to(device) for key, value in inputs.items()}

    # 生成输出
    output = model.generate(**inputs, max_new_tokens=64, eos_token_id=tokenizer.eop_token_id,do_sample=True)

    # 解码生成的文本
    text = tokenizer.decode(output[0].tolist())
    # print(text)

    # 提取数据
    start_marker = '<|startofpiece|>'
    end_marker = '<|endofpiece|>'
    # 找到起始和结束位置
    start_index = text.find(start_marker) + len(start_marker)
    end_index = text.find(end_marker)
    # 提取所需数据
    extracted_data = text[start_index:end_index].strip()

    return extracted_data

# input_text = ds["test"][5]["content"]
# generate_summary(input_text)


# 示例用法
# input_text = ds["test"][5]["content"]
# summary = generate_summary(input_text, tokenizer, model, device)
# print('文本摘要内容为: ' + summary)

In [89]:
from ipywidgets import Button, Textarea, Output, HBox, VBox

# 定义一个简单的函数
def greet(b):
    input_text = input_widget.value
    if input_text == '':
        text = '请输入一些内容。'
    else:
        text = generate_summary(input_text)
    
    output_widget.clear_output()
    with output_widget:
        print(f'{text}')

# 创建一个输出控件
output_widget = Output()

# 创建一个大的文本输入控件
input_widget = Textarea(
    placeholder='输入你的文本...',
    description='文本输入:',
    layout={'width': '600px', 'height': '300px'},  # 设置宽度和高度
    style={'overflow': 'auto'}  # 允许滚动
)

# 创建一个按钮
submit_button = Button(description="生成摘要")

# 将按钮的点击事件与 greet 函数关联
submit_button.on_click(greet)
# 增加间隔
# input_widget.layout.margin = '30px 30px 30px 30px'  # 上右下左的间距
# submit_button.layout.margin = '0 0 30px 0'  # 上右下左的间距
# 布局
ui = VBox([input_widget, submit_button, output_widget])

# 显示控件
ui

VBox(children=(Textarea(value='', description='文本输入:', layout=Layout(height='300px', width='600px'), placehold…