In [None]:
from modelscope import AutoModelForCausalLM, AutoTokenizer
import torch

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Using device: {device}')

model = AutoModelForCausalLM.from_pretrained(
    'Qwen/Qwen2.5-3B-Instruct',
    device_map='auto',
    torch_dtype='auto',)
tokenizer = AutoTokenizer.from_pretrained('Qwen/Qwen2.5-3B-Instruct')

In [3]:
# check if the model and tokenizer are loaded correctly
# print(model)
# print(tokenizer)

In [4]:
def chat(prompt: str):
    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": prompt},
    ]
    text = tokenizer.apply_chat_template(messages, 
                                         tokenize=False,
                                         add_generation_prompt=True,)
    # print(text)

    model_inputs = tokenizer([text], return_tensors='pt').to(device)
    generated_ids = model.generate(
        model_inputs.input_ids,
        max_new_tokens=512,)
    generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    return response

response = chat("What is the capital of France?")
print(response)

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


The capital of France is Paris.


In [5]:
def preprocess(tokenizer, batch_messages):
    input_ls, target_ls = list(), list()

    im_start_id = tokenizer('<|im_start|>').input_ids
    im_end_id = tokenizer('<|im_end|>').input_ids
    newline_id = tokenizer('\n').input_ids
    padding_id = tokenizer('<|endoftext|>').input_ids
    ignore = [-100]

    for messages in batch_messages:
        input_ids, target_ids = list(), list()
        for msg in messages:
            role_id = tokenizer(msg['role']).input_ids
            content_id = tokenizer(msg['content']).input_ids
            if msg['role'] in ['system', 'user']:
                ignore_parts = role_id + content_id + newline_id
                input_ids.extend(im_start_id + ignore_parts + im_end_id + newline_id)
                target_ids.extend(im_start_id + ignore * len(ignore_parts) + im_end_id + newline_id)
            else:
                ignore_parts = role_id + newline_id
                input_ids.extend(im_start_id + ignore_parts + content_id + im_end_id + newline_id)
                target_ids.extend(im_start_id + ignore * len(ignore_parts) + content_id + im_end_id + newline_id)
        input_ls.append(input_ids)
        target_ls.append(target_ids)
    
    # padding
    max_len = max(len(input_ids) for input_ids in input_ls)
    for input_ids, target_ids in zip(input_ls, target_ls):
        input_ids.extend(padding_id * (max_len - len(input_ids)))
        target_ids.extend(ignore * (max_len - len(target_ids)))
    batch_input_idx = torch.tensor(input_ls, dtype=torch.long)
    batch_target_idx = torch.tensor(target_ls, dtype=torch.long)
    batch_mask = batch_input_idx.ne(padding_id[0]).type(torch.long)  # padding mask
    return batch_input_idx, batch_target_idx, batch_mask


In [None]:
prompt = '3+3等于几'
messages = [[{"role": "system", "content": "You are a helpful assistant."},
             {"role": "user", "content": prompt},
             {'role': 'assistant', 'content': '3+3等于5'}],
            [{'role': 'system', 'content': 'You are a helpful assistant.'},
             {'role': 'user', 'content': prompt},
             {'role': 'assistant', 'content': '3+3等于5。'}],
]

'''
输入：<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>assistant\n3+3等于5<|im_end|>\n
输出：<|im_start|>------------------------------------<|im_end|>\n<|im_start|>-----------3+3等于5<|im_end|>\n
'''

from torch.nn import CrossEntropyLoss
model.train()
for i in range(100):
    batch_input_ids, batch_target_ids, batch_mask = preprocess(tokenizer, messages)
    model_outputs = model(input_ids=batch_input_ids.to(device),
                        batch_mask=batch_mask.to(device))
    output_tokens = model_outputs.logits.argmax(dim=-1)

    logits = model_outputs.logits[:, :-1, :]
    targets = batch_target_ids[:, 1:].to(device)
    # print(f'logits shape: {logits.shape}')
    # print(f'targets shape: {targets.shape}')
    
    # 计算损失
    criterion = CrossEntropyLoss()
    loss = criterion(logits.reshape(-1, logits.size(2)), targets.reshape(-1))

    # 反向传播和优化
    optimizer = torch.optim.SGD(model.parameters(), lr=1e-5)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    if i % 10 == 0:
        print(f'>> epoch: {i+1}, loss: {loss}')

>> epoch: 1, loss: 4.75
>> epoch: 11, loss: 1.0546875
>> epoch: 21, loss: 0.12451171875
>> epoch: 31, loss: 0.10791015625
>> epoch: 41, loss: 0.0927734375
>> epoch: 51, loss: 0.0908203125
>> epoch: 61, loss: 0.08349609375
>> epoch: 71, loss: 0.0830078125
>> epoch: 81, loss: 0.08251953125
>> epoch: 91, loss: 0.0751953125


In [7]:
print(chat('3+3等于几？'))
print(chat('6+6等于几？'))

3+3等于5
6+6等于12。
