In [1]:
from datasets import load_dataset, concatenate_datasets

# List of available subsets
subsets = [
    "arithmetic_1dc", "arithmetic_2da", "arithmetic_2dm", "arithmetic_2ds",
    "arithmetic_3da", "arithmetic_3ds", "arithmetic_4da", "arithmetic_4ds",
    "arithmetic_5da", "arithmetic_5ds"
]

# Load each subset and store in a list
datasets_list = [load_dataset("EleutherAI/arithmetic", subset)["validation"] for subset in subsets]

# Merge all subsets into one dataset
dataset = concatenate_datasets(datasets_list)

# Print the total number of examples and a sample
print(f"Total examples: {len(dataset)}")

Total examples: 20000


In [5]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
from trl import SFTConfig, SFTTrainer, DataCollatorForCompletionOnlyLM
model_name = 'OpenLLM-France/Lucie-7B-Instruct'
output_dir = 'sft'
ft_model_name = 'Lucie-7B-arithmetic'
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
    device_map="auto",
)


tokenizer = AutoTokenizer.from_pretrained(model_name)
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    task_type="CAUSAL_LM",
    r=128,
    lora_alpha=32,
    lora_dropout=0.2,
    target_modules=["q_proj", "v_proj"]
)

model = get_peft_model(model, lora_config)

model.print_trainable_parameters()

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

trainable params: 54,525,952 || all params: 6,761,484,288 || trainable%: 0.8064


In [7]:
def formatting_prompts_func(example):
    output_texts = []
    for i in range(len(example['context'])):
        text = f"### Question: {example['context'][i]}\n ### Answer: {example['completion'][i]}"
        output_texts.append(text)
    return output_texts

response_template = " ### Answer:"
collator = DataCollatorForCompletionOnlyLM(response_template, tokenizer=tokenizer)

trainer = SFTTrainer(
    model,
    train_dataset=dataset,
    args=SFTConfig(output_dir=output_dir, save_steps=10000),
    formatting_func=formatting_prompts_func,
    data_collator=collator,
)

trainer.train()

[34m[1mwandb[0m: Currently logged in as: [33mhtagourti[0m ([33mhtagourti-linagora[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Step,Training Loss
500,0.176
1000,0.0782
1500,0.0696
2000,0.0574
2500,0.0542
3000,0.0389
3500,0.0423
4000,0.0387
4500,0.0406
5000,0.0387


TrainOutput(global_step=7500, training_loss=0.05066116956075033, metrics={'train_runtime': 1103.0932, 'train_samples_per_second': 54.393, 'train_steps_per_second': 6.799, 'total_flos': 8.872452552012595e+16, 'train_loss': 0.05066116956075033, 'epoch': 3.0})