In [2]:

from bert4torch.models import build_transformer_model
from bert4torch.snippets import sequence_padding, text_segmentate
import torch.nn as nn
import torch
import torch.optim as optim
from torch.utils.data import DataLoader
import torch
from bert4torch.models import build_transformer_model, BaseModel
from bert4torch.snippets import ListDataset
from bert4torch.generation import SeqGeneration
from bert4torch.callbacks import Callback, Logger
from bert4torch.optimizers import get_linear_schedule_with_warmup
from transformers import AutoTokenizer
import json
import jieba 
from rouge_chinese import Rouge
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
import numpy as np
from tqdm import tqdm
from peft import LoraConfig#, prepare_model_for_kbit_training  # 需要pip install git+https://github.com/huggingface/peft.git


In [3]:
# 基本参数
mode = 'train'
max_source_length = 1250
max_target_length = 200
lr = 5e-4
batch_size = 16  # 根据显存大小调整
eval_batch_size = 4
grad_accumulation_steps = 1  # 根据显存大小调整
max_seq_length = max_source_length + max_target_length
ignore_pad_token_for_loss = True
epochs = 1
steps_per_epoch = 3000
prefix = ''
prompt_column = 'content'
response_column = 'summary'
history_column = None

In [4]:
# 模型配置
dir_path = "/root/autodl-tmp/chatglm2-int4"
config_path = dir_path + '/bert4torch_config.json'
# checkpoint_path = [dir_path + f'\\bert4torch_pytorch_model_{i}.bin' for i in range(1,8)]  # 可加载单个，也可以加载多个
checkpoint_path = dir_path + '/bert4torch_pytorch_model.bin'
device = 'cuda' if torch.cuda.is_available() else 'cpu'

tokenizer = AutoTokenizer.from_pretrained(dir_path, trust_remote_code=True)


Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.


In [5]:
# 加载数据集
class MyDataset(ListDataset):
    @staticmethod
    def load_data(filename):
        """加载数据，并尽量分为不超过maxlen的句子
        """
        D = []
        with open(filename, encoding='utf-8') as f:
            for l in f:
                l = json.loads(l)
                if l["task_type"] =="table_extract":
                    prompt = generate_prompt_QA(";".join(l["instruction"]),l["question"])
                elif l["task_type"] =="tuple_extract":
                    prompt = generate_prompt_QuerySummary(";".join(l["instruction"]),l["question"])
                answer=l["answer"]
                D.append((prompt, answer))
                    
        return D

# 抽取表格信息
def generate_prompt_QA(related_text, query: str, preprompt="") -> str:
    prompt_template = """基于以下表格信息，来回答用户的问题。
        如果无法从中得到答案，请说 "没有找到该问题对应的知识"，不允许在答案中添加编造成分，答案请使用中文。
        已知内容:
        {context}
        问题:
        '{question}'"""

    prompt = preprompt + prompt_template.replace("{question}", query).replace("{context}", related_text)
    return prompt

def generate_prompt_QuerySummary(related_text, query: str, preprompt="") -> str:
    prompt_template = """已知问题:
        {question}，请提取问题的主宾二元组，格式：xx|||xx"""

    prompt = preprompt + prompt_template.replace("{question}", query).replace("{context}", related_text)
    return prompt

def collate_train_fn(batch):
    batch_token_ids, batch_labels = [], []
    for prompt, answer in batch:
        a_ids = tokenizer.encode(text=prompt, add_special_tokens=True, truncation=True, max_length=max_source_length)
        b_ids = tokenizer.encode(text=answer, add_special_tokens=False, truncation=True, max_length=max_target_length)

        context_length = len(a_ids)
        input_ids = a_ids + b_ids + [tokenizer.eos_token_id]
        labels = [tokenizer.pad_token_id] * context_length + b_ids + [tokenizer.eos_token_id]
        batch_token_ids.append(input_ids)
        batch_labels.append(labels)

    batch_token_ids = torch.tensor(sequence_padding(batch_token_ids, value=tokenizer.pad_token_id), dtype=torch.long, device=device)
    batch_labels = torch.tensor(sequence_padding(batch_labels, value=tokenizer.pad_token_id), dtype=torch.long, device=device)
    return [batch_token_ids], batch_labels

def collate_dev_fn(batch):
    batch_prompt, batch_labels = [], []
    for prompt, labels  in batch:
        batch_prompt.append(prompt)
        
        label_ids = tokenizer(text_target=labels, max_length=max_target_length, truncation=True)['input_ids']
        batch_labels.append(tokenizer.decode(label_ids, skip_special_tokens=True))
    return batch_prompt, batch_labels

In [6]:
train_dataloader = DataLoader(MyDataset('../data/train_sft.json'), batch_size=batch_size, shuffle=True, collate_fn=collate_train_fn) 
dev_dataloader = DataLoader(MyDataset('../data/dev_sft.json'), batch_size=eval_batch_size, shuffle=False, collate_fn=collate_dev_fn)

In [8]:
# 建立模型，加载权重
model = build_transformer_model(config_path=config_path, checkpoint_path=checkpoint_path, model='glm2', add_trainer=True, 
                                tie_emb_prj_weight=True, # 绑定embedding和dense/lm_head的权重，transformers中有绑定
                                ).half()

# 量化
load_in_nbit = None  # 设置为True在3060卡上loss能正常下降，在v100上loss就是nan
if load_in_nbit == 8:
    model.gradient_checkpointing_enable()
    model.enable_input_require_grads()

    class CastOutputToFloat(nn.Sequential):
        def forward(self, x):
            return super().forward(x).to(torch.float32)
    model = model.quantize(quantization_method='load_in_8bit', llm_int8_skip_modules=['model.embeddings.word_embeddings', 'lm_head']) # v3.0.0（含）之前lm_head换成dense
    # model.dense = CastOutputToFloat(model.dense)  # v3.0.0（含）之前使用
    model.lm_head = CastOutputToFloat(model.lm_head)
    
elif load_in_nbit == 4:
    from transformers import BitsAndBytesConfig
    q_config = BitsAndBytesConfig(load_in_4bit=True,
                                bnb_4bit_quant_type='nf4',
                                bnb_4bit_use_double_quant=True,
                                bnb_4bit_compute_dtype=torch.float16,  # 可选 torch.float32, torch.float16, torch.bfloat16
                                llm_int8_skip_modules=['model.embeddings.word_embeddings', 'lm_head']  # v3.0.0（含）之前lm_head换成dense
                                )
    model = model.quantize(quantization_method='load_in_4bit', quantization_config=q_config)
    model = prepare_model_for_kbit_training(model, use_gradient_checkpointing=True)

# lora
peft_config = LoraConfig(
        inference_mode=False,
        r=8,
        lora_alpha=32,
        lora_dropout=0.1,
        target_modules=['q', 'k', 'v']
    )
model = model.get_peft_model(peft_config).to(device)

Quantize linear layers: 100%|██████████| 168/168 [00:08<00:00, 19.04it/s]


[0;32m[INFO][0m trainable params: 3784704 || all params: 3126754304 || trainable%: 0.12104257744710856


In [7]:
model

LoraModel(
  (model): BertBaseModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(65024, 4096, padding_idx=0)
      (dropout): Dropout(p=0, inplace=False)
    )
    (decoderLayer): ModuleList(
      (0-27): 28 x GLMBlock(
        (multiHeadAttention): MultiHeadAttentionLayer(
          (q): Linear(
            in_features=4096, out_features=4096, bias=True
            (lora_dropout): ModuleDict(
              (default): Dropout(p=0.1, inplace=False)
            )
            (lora_A): ModuleDict(
              (default): Linear(in_features=4096, out_features=8, bias=False)
            )
            (lora_B): ModuleDict(
              (default): Linear(in_features=8, out_features=4096, bias=False)
            )
            (lora_embedding_A): ParameterDict()
            (lora_embedding_B): ParameterDict()
          )
          (k): Linear(
            in_features=4096, out_features=256, bias=True
            (lora_dropout): ModuleDict(
              (default): Dr

In [9]:
class CrossEntropyLoss(nn.CrossEntropyLoss):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
    def forward(self, logits, labels):
        '''
        logits: [btz, seq_len, vocab_size]
        labels: token_ids: [btz, seq_len]
        '''
        raw_dtyps = logits.dtype
        logits = logits.to(torch.float32)
        print( logits.dtype,labels.dtype)
        logits = logits[:, :-1, :].contiguous()  # 预测序列，错开一位
        labels = labels[:, 1:].contiguous() # 目标token_ids
        
        logits = logits.reshape(-1, logits.shape[-1])
        labels = labels.flatten()
        loss = super().forward(logits, labels)

        return loss.to(raw_dtyps)

In [10]:
optimizer = optim.AdamW(model.parameters(), lr)
scheduler = get_linear_schedule_with_warmup(optimizer, 0, steps_per_epoch*epochs)  # torch4keras<0.0.8需要设置为(steps_per_epoch*epochs)//grad_accumulation_steps
model.compile(loss=CrossEntropyLoss(ignore_index=tokenizer.pad_token_id), optimizer=optimizer, scheduler=scheduler, grad_accumulation_steps=grad_accumulation_steps, clip_grad_norm=1.0)

class Chat(SeqGeneration):
    def pre_process(self, text):
        return [tokenizer(text, max_length=max_source_length, truncation=True)['input_ids']]
    def post_process(self, output_ids):
        return [tokenizer.decode(output_id.cpu().numpy()) for output_id in output_ids]
generation = Chat(model, tokenizer, start_id=None, end_id=tokenizer.eos_token_id, pad_id=tokenizer.pad_token_id, 
                  mode='random_sample', maxlen=512, default_rtype='logits', use_states=True)

In [11]:
def evaluate(data, epoch='final'):
    preds, labels = [], []
    for prompt, label in tqdm(data, desc='Evaluating'):
        pred = generation.batch_generate(prompt, topk=50, topp=0.7, temperature=0.95)
        preds.extend(pred)
        labels.extend(label)
        with open(f'./chatglm2_int4_lora_preds_{epoch}.txt', 'a+', encoding='utf-8') as f:
            for pred_i, label_i in zip(pred, label):
                f.write(json.dumps({'pred': pred_i, 'label': label_i}, ensure_ascii=False) + '\n')
    count,sums=0,0
    for pred, label in zip(preds, labels):
        sums+=1
        if pred==label:
            count+=1
        
    return {"accuracy":round(count/sums,5)}
class Evaluator(Callback):
    """评估与保存
    """
    def __init__(self):
        self.best = 0

    def on_epoch_end(self, steps, epoch, logs=None):
        score_dict=evaluate(dev_dataloader)
        if score_dict["accuracy"]>self.best:
            self.best = score_dict["accuracy"]
            model.save_weights(f'chatglm2_int4_lora/model_{self.best}.pt', trainable_only=True)
    
    


In [12]:

evaluator = Evaluator()
logger = Logger('./chatglm2_int4_lora.log', interval=100)

model.fit(train_dataloader, steps_per_epoch=steps_per_epoch, epochs=epochs, callbacks=[evaluator, logger])
# score_dict = evaluator.evaluate(dev_dataloader)



2023-09-27 18:31:26 - Start Training

2023-09-27 18:31:26 - Epoch: 1/1


RuntimeError: self and mat2 must have the same dtype