In [1]:

from datasets import load_dataset
ds = load_dataset('openai/gsm8k', 'main')
dataset = ds['train']

In [6]:
SYSTEM_PROMPT = """
You are an expert mathematical reasoning assistant.  
Your task is to solve word problems by reasoning step by step before providing the final answer.  
A part of the reasoning has been provided. You need to build on that and provide the final answer.

Follow this format:  
#### reasoning  
(Provide a detailed step-by-step solution, showing intermediate calculations.)  

#### answer  
(State the final numerical answer clearly.)  
"""  

def extract_final_answer(text):
    if "####" not in text:
        return None
    return text.split("####")[1].strip()
def get_prompts(example):
    return {
        'prompt': [
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": f"{example["question"]}\n #### Reasoning : {example['answer'].split('####')[0].strip()}"},
        ],
        'solution': extract_final_answer(example['answer'])
    }
dataset = dataset.map(get_prompts)

Map:   0%|          | 0/7473 [00:00<?, ? examples/s]

In [7]:
dataset[0]

{'question': 'Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?',
 'answer': 'Natalia sold 48/2 = <<48/2=24>>24 clips in May.\nNatalia sold 48+24 = <<48+24=72>>72 clips altogether in April and May.\n#### 72',
 'prompt': [{'content': '\nYou are an expert mathematical reasoning assistant.  \nYour task is to solve word problems by reasoning step by step before providing the final answer.  \n\nFollow this format:  \n#### reasoning  \n(Provide a detailed step-by-step solution, showing intermediate calculations.)  \n\n#### answer  \n(State the final numerical answer clearly.)  \n',
   'role': 'system'},
  {'content': 'Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?\n #### Reasoning : Natalia sold 48/2 = <<48/2=24>>24 clips in May.\nNatalia sold 48+24 = <<48+24=72>>72 clips altogeth

In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from trl import SFTConfig, SFTTrainer, DataCollatorForCompletionOnlyLM
model_name = 'OpenLLM-France/Lucie-7B-Instruct'
output_dir = 'sft-gsm8k'
ft_model_name = 'Lucie-7B-arithmetic'
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
    device_map="auto",
)


tokenizer = AutoTokenizer.from_pretrained(model_name)
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    task_type="CAUSAL_LM",
    r=128,
    lora_alpha=32,
    lora_dropout=0.2,
    target_modules=["q_proj", "v_proj"]
)

model = get_peft_model(model, lora_config)

model.print_trainable_parameters()

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

trainable params: 54,525,952 || all params: 6,761,484,288 || trainable%: 0.8064


In [3]:
def formatting_prompts_func(example):
    output_texts = []
    for i in range(len(example['context'])):
        text = f"### Question: {example['context'][i]}\n ### Answer: {example['completion'][i]}"
        output_texts.append(text)
    return output_texts

response_template = " ### Answer:"
collator = DataCollatorForCompletionOnlyLM(response_template, tokenizer=tokenizer)

trainer = SFTTrainer(
    model,
    train_dataset=dataset,
    args=SFTConfig(output_dir=output_dir, save_steps=100000, max_steps=2000),
    formatting_func=formatting_prompts_func,
    data_collator=collator,
)

trainer.train()

[34m[1mwandb[0m: Currently logged in as: [33mhtagourti[0m ([33mhtagourti-linagora[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Step,Training Loss
500,0.1692
1000,0.0
1500,0.0
2000,0.0


TrainOutput(global_step=2000, training_loss=0.04230678769879159, metrics={'train_runtime': 306.1188, 'train_samples_per_second': 52.267, 'train_steps_per_second': 6.533, 'total_flos': 3.3670836781056e+16, 'train_loss': 0.04230678769879159, 'epoch': 1.0})