In [None]:
from transformers import AutoModelForSeq2SeqLM,AutoTokenizer , T5ForConditionalGeneration,Seq2SeqTrainer,Seq2SeqTrainingArguments
from transformers.models.t5.modeling_t5 import T5LayerSelfAttention , T5Attention

In [None]:
from TALib import TALib

In [None]:
tokenizer = AutoTokenizer.from_pretrained(TALib.TK_ckpt)
model = AutoModelForSeq2SeqLM.from_pretrained(TALib.CHECKPOINT)

In [None]:
model

In [None]:
import torch_pruning as tp

In [None]:
input_text = "translate English to French: Hello, how are you?"
input_ids = tokenizer(input_text, return_tensors="pt").input_ids
decoder_input_ids = tokenizer("Ce message est une traduction :", return_tensors="pt").input_ids

In [None]:
dummy_input = {"input_ids":input_ids, "decoder_input_ids":decoder_input_ids}

In [None]:
dummy_input

In [None]:
model(**dummy_input)

In [None]:
imp = tp.importance.MagnitudeImportance(p=2, group_reduction="mean")
# base_macs, base_params = tp.utils.count_ops_and_params(model, dummy_input)
num_heads = {}

In [None]:
for m in model.modules():
    if isinstance(m, T5Attention):
        
        num_heads[m.q] = m.n_heads
        num_heads[m.k] = m.n_heads
        num_heads[m.v] = m.n_heads

        
        
        # print(m)
        # num_heads[m.query] = m.num_attention_heads
        # num_heads[m.key] = m.num_attention_heads
        # num_heads[m.value] = m.num_attention_heads

In [None]:
pruner = tp.pruner.MetaPruner(
    model, 
    dummy_input, 
    global_pruning=False, # If False, a uniform pruning ratio will be assigned to different layers.
    importance=imp, # importance criterion for parameter selection
    iterative_steps=1, # the number of iterations to achieve target pruning ratio
    pruning_ratio=0.7,
    num_heads=num_heads,
    prune_head_dims=False,
    prune_num_heads=True,
    head_pruning_ratio=0.5,
    # output_transform=lambda out: out.pooler_output.sum(),
    # ignored_layers=[model.],
)

for g in pruner.step(interactive=True):
    #print(g)
    g.prune()


In [None]:
TALib.show_param_ratio(model)

In [None]:
model

# testing

In [None]:

from datasets import load_dataset

billsum = load_dataset("billsum", split="train")

In [None]:
preprocess_function = TALib.preprocess_function_pass_tokenizer(tokenizer)

In [None]:
billsum = billsum.train_test_split(test_size=0.2)
tokenized_billsum = billsum.map(preprocess_function, batched=True)

In [None]:
from transformers import DataCollatorForSeq2Seq

data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=TALib.CHECKPOINT)

In [None]:
import evaluate

rouge = evaluate.load("rouge")

In [None]:
training_args = Seq2SeqTrainingArguments(
    output_dir="TA_billsum_model",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    weight_decay=0.01,  # Assuming you still want weight decay as it wasn't mentioned to remove
    save_total_limit=3,  # Assuming to maintain the save limit as before
    num_train_epochs=4,
    lr_scheduler_type="linear",
    seed=42,
    fp16=True,  # You mentioned "Native AMP" for mixed precision training which is generally enabled by setting fp16=True in Transformers
    logging_steps=10,  # Assuming to keep the logging frequency as before
    predict_with_generate=True,
)

compute_metrics = TALib.compute_metrics_pass_tokenizer(tokenizer)

trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_billsum["train"],
    eval_dataset=tokenized_billsum["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

In [None]:
billsum_test = load_dataset("billsum", split="test")
tokenized_billsum_test = billsum_test.map(preprocess_function, batched=True)

In [None]:
trainer.evaluate(tokenized_billsum_test)

In [None]:
results = trainer.predict(tokenized_billsum_test)

In [None]:
decoded_prediction = tokenizer.batch_decode(results[0], skip_special_tokens=True)


In [None]:
TALib.dump_to_kaggle_format(decoded_prediction , 'pruned_model_0.3_real_torch_pruning.csv')

In [None]:
final_score = TALib.run_score(predict=decoded_prediction,label=billsum_test)

In [None]:
print(final_score)

In [None]:
batch_size = 15

In [None]:
training_args = Seq2SeqTrainingArguments(
    output_dir="./output/pruning",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    weight_decay=0.01,  # Assuming you still want weight decay as it wasn't mentioned to remove
    save_total_limit=3,  # Assuming to maintain the save limit as before
    num_train_epochs=1,
    lr_scheduler_type="linear",
    seed=42,
    fp16=True,  # You mentioned "Native AMP" for mixed precision training which is generally enabled by setting fp16=True in Transformers
    logging_steps=10,  # Assuming to keep the logging frequency as before
    predict_with_generate=True,

)

compute_metrics = TALib.compute_metrics_pass_tokenizer(tokenizer)

trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_billsum["train"],
    eval_dataset=tokenized_billsum["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

In [None]:
model

In [None]:
trainer.train()

In [None]:
TALib.save_model(model , "output/try_pruning_0.3_torch_pruning")

In [None]:
import torch
from transformers import T5ForConditionalGeneration, T5Tokenizer
import torch.nn.utils.prune as prune

# 加载预训练的 T5 模型和分词器
model_name = "t5-small"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

# 创建一个示例输入
input_text = "Translate English to French: How are you?"

# 使用分词器对输入进行编码
input_ids = tokenizer(input_text, return_tensors="pt").input_ids

# 剪枝前，查看模型的参数量
print(f"模型参数量：{sum(p.numel() for p in model.parameters() if p.requires_grad)}")

# 对模型中的某些层进行剪枝
# 例如，剪枝 encoder 中第一个 self-attention 层的权重
# 对 T5 模型中的自注意力层进行剪枝
prune.ln_structured(model.encoder.block[0].layer[0].SelfAttention.q,
                    name="weight", amount=0.5, n=2, dim=0)
prune.ln_structured(model.encoder.block[0].layer[0].SelfAttention.k,
                    name="weight", amount=0.5, n=2, dim=0)
prune.ln_structured(model.encoder.block[0].layer[0].SelfAttention.v,
                    name="weight", amount=0.5, n=2, dim=0)
prune.ln_structured(model.encoder.block[0].layer[0].SelfAttention.o,
                    name="weight", amount=0.5, n=2, dim=0)


# 剪枝后，查看模型的参数量
print(f"剪枝后模型参数量：{sum(p.numel() for p in model.parameters() if p.requires_grad)}")


In [None]:
TALib.show_param_ratio(model)