In [None]:
from datasets import load_dataset, concatenate_datasets
ds_nr = load_dataset('cognitivecomputations/dolphin-r1', 'nonreasoning')['train']
ds_r1 = load_dataset('cognitivecomputations/dolphin-r1', 'reasoning-deepseek')['train']
ds_fl = load_dataset('cognitivecomputations/dolphin-r1', 'reasoning-flash')['train']

In [None]:
def get_prompts_r1(example):
    msg = example['messages']
    msg.append({"role": "assistant", "content": f"#### Reasoning : {example['reasoning']}\n#### Answer : {example['answer']}"})
    return {"messages" : msg}
ds_r1 = ds_r1.map(get_prompts_r1).remove_columns(['reasoning', 'answer','model'])
ds_fl = ds_fl.map(get_prompts_r1).remove_columns(['reasoning', 'answer','model'])

ds_nr = ds_nr.remove_columns(['score','refusal','compliance_rating','overall_quality'])
ds_nr = ds_nr.cast(ds_r1.features)

In [None]:
dataset = concatenate_datasets([ds_nr, ds_r1, ds_fl])

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
model_name = 'OpenLLM-France/Lucie-7B-Instruct'
output_dir = '/mnt/disk/dolphin-r1'
ft_model_name = 'Lucie-7B-dolphin-r1'
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
    device_map="auto",
)


tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.padding_side = "right"
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    task_type="CAUSAL_LM",
    r=128,
    lora_alpha=32,
    lora_dropout=0.2,
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj",
    ]
)

model = get_peft_model(model, lora_config)

model.print_trainable_parameters()

In [None]:
from trl import SFTConfig, SFTTrainer
trainer = SFTTrainer(
    model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    args=SFTConfig(output_dir=output_dir, save_steps=100000, packing=True, logging_steps=50, run_name="'sft-gsm8k", report_to=["wandb"]),
)

trainer.train()