In [None]:
import torch
torch.mps.empty_cache()

In [None]:
# train_grpo.py
from datasets import load_dataset
from trl import GRPOConfig, GRPOTrainer

dataset = load_dataset("trl-lib/tldr", split="train")

# Define the reward function, which rewards completions that are close to 20 characters
def reward_len(completions, **kwargs):
    return [-abs(20 - len(completion)) for completion in completions]

training_args = GRPOConfig(output_dir="Qwen2-0.5B-GRPO", logging_steps=10)
trainer = GRPOTrainer(
    model="Qwen/Qwen2-0.5B-Instruct",
    reward_funcs=reward_len,
    args=training_args,
    train_dataset=dataset,
)
trainer.train()

In [11]:
print("数据集列名:", dataset.column_names)
print("数据集结构:", dataset.features)

数据集列名: ['prompt', 'completion']
数据集结构: {'prompt': Value(dtype='string', id=None), 'completion': Value(dtype='string', id=None)}


In [13]:
print("\n第一个样本:")
print(dataset[0])


第一个样本:
{'prompt': "SUBREDDIT: r/relationships\n\nTITLE: I (f/22) have to figure out if I want to still know these girls or not and would hate to sound insulting\n\nPOST: Not sure if this belongs here but it's worth a try. \n\nBackstory:\nWhen I (f/22) went through my first real breakup 2 years ago because he needed space after a year of dating roand  it effected me more than I thought. It was a horrible time in my life due to living with my mother and finally having the chance to cut her out of my life. I can admit because of it was an emotional wreck and this guy was stable and didn't know how to deal with me. We ended by him avoiding for a month or so after going to a festival with my friends. When I think back I wish he just ended. So after he ended it added my depression I suffered but my friends helped me through it and I got rid of everything from him along with cutting contact. \n\nNow: Its been almost 3 years now and I've gotten better after counselling and mild anti depressan

In [14]:
print('prompt: ', dataset[0]['prompt'])
print('completion: ', dataset[0]['completion'])

prompt:  SUBREDDIT: r/relationships

TITLE: I (f/22) have to figure out if I want to still know these girls or not and would hate to sound insulting

POST: Not sure if this belongs here but it's worth a try. 

Backstory:
When I (f/22) went through my first real breakup 2 years ago because he needed space after a year of dating roand  it effected me more than I thought. It was a horrible time in my life due to living with my mother and finally having the chance to cut her out of my life. I can admit because of it was an emotional wreck and this guy was stable and didn't know how to deal with me. We ended by him avoiding for a month or so after going to a festival with my friends. When I think back I wish he just ended. So after he ended it added my depression I suffered but my friends helped me through it and I got rid of everything from him along with cutting contact. 

Now: Its been almost 3 years now and I've gotten better after counselling and mild anti depressants. My mother has be

In [15]:
print(dataset[:5])

{'prompt': ["SUBREDDIT: r/relationships\n\nTITLE: I (f/22) have to figure out if I want to still know these girls or not and would hate to sound insulting\n\nPOST: Not sure if this belongs here but it's worth a try. \n\nBackstory:\nWhen I (f/22) went through my first real breakup 2 years ago because he needed space after a year of dating roand  it effected me more than I thought. It was a horrible time in my life due to living with my mother and finally having the chance to cut her out of my life. I can admit because of it was an emotional wreck and this guy was stable and didn't know how to deal with me. We ended by him avoiding for a month or so after going to a festival with my friends. When I think back I wish he just ended. So after he ended it added my depression I suffered but my friends helped me through it and I got rid of everything from him along with cutting contact. \n\nNow: Its been almost 3 years now and I've gotten better after counselling and mild anti depressants. My 

In [17]:
from transformers import AutoTokenizer, AutoModelForCausalLM
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2-0.5B-Instruct")

In [23]:
input_text = '请介绍一下你自己，并且告诉我你可以做什么'
input_ids = tokenizer(input_text, return_tensors='pt')['input_ids']
output = model.generate(input_ids, max_length=100, top_p=0.95)
output = tokenizer.batch_decode(list(output))

In [24]:
output

['请介绍一下你自己，并且告诉我你可以做什么？\n\n作为人工智能助手，我没有任何个人经验、情感或能力。我被设计为帮助用户解决问题和提供信息，而不是有实际的活动或行为。如果您有任何问题需要解答，请随时提问。\n\n你是否可以理解语言？<|im_end|>']