SETUP AND INITAL CONFIG

In [2]:
from unsloth import FastModel
import torch

ðŸ¦¥ Unsloth: Will patch your computer to enable 2x faster free finetuning.


  from .autonotebook import tqdm as notebook_tqdm
W1125 16:14:34.769000 31864 Lib\site-packages\torch\distributed\elastic\multiprocessing\redirects.py:29] NOTE: Redirects are currently not supported in Windows or MacOs.


ðŸ¦¥ Unsloth Zoo will now patch everything to make training faster!


In [None]:
model, tokenizer = FastModel.from_pretrained(
    model_name="google/gemma-3-1b-it",
    max_seq_length=1024,
    load_in_4bit=True,
    load_in_8bit=False,
)

In [None]:
model = FastModel.get_peft_model(
    model,
    finetune_vision_layers=False,
    finetune_language_layers=True,
    finetune_attention_modules=True,
    finetune_mlp_modules=True,
    r=16,
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3047
)

In [None]:
from datasets import load_dataset
dataset = load_dataset("madrylab/gsm8k-platinum", "main", split="test")

In [None]:
dataset

In [None]:
dataset[0]["question"]

In [None]:
dataset[0]["answer"]


In [None]:
def extract_hash_answer(text):
    if "####" not in text: return None
    return text.split("####")[1].strip()
extract_hash_answer(dataset[0]["answer"])

In [None]:
reasoning = "<reasoning>"
answer = "<answer>"

system_prompt = \
f"""You are given a problem.
Think about the problem and provide your working out.
Place it between {reasoning} and {reasoning}.
Then, provide your solution between {answer}{answer}."""


In [None]:
dataset = dataset.map(lambda x: {
    "prompt" : [
        {"role": "system", "content": system_prompt},
        {"role": "user",   "content": x["question"]},
    ],
    "answer": extract_hash_answer(x["answer"]),
})

In [None]:
dataset[0]

In [None]:
import re

match_format = re.compile(
    rf"^[\s]{{0,}}"\
    rf"{reasoning}.+?{reasoning}.*?"\
    rf"{answer}(.+?){answer}"\
    rf"[\s]{{0,}}$",
    flags = re.MULTILINE | re.DOTALL
)

In [None]:
match_format.search("<reasoning> Here is my working out<reasoning>"\
                    "<answer>42 <answer>")

In [None]:
def match_format_exactly(completions, **kwargs):
    scores = []
    for output in completions:
        score = 0
        response = output[0]["content"]
        if match_format.search(response) is not None:
            score += 3
            scores.append(score)
        else:
            scores.append(score)
    return scores

In [None]:
def match_format_partially(completions, **kwargs):
    scores = []
    for output in completions:
        score = 0
        response = output[0]["content"]
        score += 1 if response.count(reasoning) == 2 else -1
        score += 1 if response.count(answer) == 2 else -1
        scores.append(score)
    return scores

In [None]:
def check_answer_correctness(prompts, completions, answer, **kwargs):
    question = prompts[0][-1]["content"]
    responses = [output[0]["content"] for output in completions]

    extracted_responses = [
        guess.group(1) if (guess := match_format.search(r)) is not None else None for r in responses
    ]

    scores = []
    for guess, true_answer in zip(extracted_responses, answer):
        score = 0
        if guess is None:
            scores.append(0)
            continue
        if guess == true_answer:
            score += 3
        elif guess.strip() == true_answer.strip():
            score += 1.5
        else:
            try:
                ratio = float(guess) / float(true_answer)
                if ratio >= 0.9 and ratio <= 1.1:
                    score += 0.5
                elif ratio >= 0.8 and ratio <= 1.2:
                    score += 0.25
                else:
                    score -= 1
            except:
                score -= 1
        scores.append(score)
    return scores

In [None]:
match_numbers = re.compile(
    rf"{answer}.*?([\d\d.]{{1,}})",
    flags= re.MULTILINE | re.DOTALL
)

In [None]:
match_numbers.findall("<answer> The answer is 42.0 <answer>")

In [None]:
def check_numbers(prompts, completions, answer, **kwargs):
    question = prompts[0][-1]["content"]
    responses = [output[0]["content"] for output in completions]
    
    extracted_responses = [guess.group(1) if (guess := match_numbers.search(r)) is not None else None for r in responses]

    scores = []
    print("*"*5, f"\nAnswer: {answer[0]}", f"\nExtracted: {extracted_responses[0]}")
    for guess, true_answer in zip(extracted_responses, answer):
        if guess is None:
            scores.append(0)
            continue
        try:
            true_answer = float(true_answer)
            guess = float(guess)
            scores.append(1.5 if guess == true_answer else 0.0)
        except:
            scores.append(0.0)
            continue
    return scores


TRAINING 

In [None]:
from trl import GRPOConfig, GRPOTrainer

In [None]:
training_args = GRPOConfig(
    learning_rate= 5e-6,
    adam_beta1= 0.9,
    adam_beta2= 0.99,
    weight_decay= 0.1,
    warmup_ratio= 0.1,
    lr_scheduler_type= "cosine",
    optim= "adamw_8bit",
    logging_steps=1,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    num_generations=8,
    max_prompt_length=256,
    max_completion_length=768,
    max_steps=50,
    save_steps=50,
    max_grad_norm=0.1,
    report_to= "wandb",
    output_dir= "outputs"
)

In [None]:
trainer = GRPOTrainer(
    model= model,
    processing_class= tokenizer,
    reward_funcs= [
        match_format_exactly,
        match_format_partially,
        check_answer_correctness,
        check_numbers
    ],
    args= training_args,
    train_dataset= dataset
)

In [None]:
trainer.train()

In [None]:
model.save_pretrained("gemma-3-1b-finetuned")  # Local saving
tokenizer.save_pretrained("gemma-3-1b-finetuned")  # Local saving
model.push_to_hub("Romaiz_Dabeer/gemma-3-1b-finetuned-unsloth") # Online saving
tokenizer.push_to_hub("Romaiz_Dabeer/gemma-3") # Online saving

In [None]:
model.save_pretrained_merged("gemma-3-1b-finetuned-unsloth", tokenizer)

In [None]:
model.push_to_hub_merged(
        "Romaiz_Dabeer/gemma-3-finetune-finetuned-unsloth", tokenizer
    )

In [None]:
model.push_to_hub_gguf(
        "HF_ACCOUNT/gemma-finetune-gguf",
        tokenizer,
        quantization_method = "Q8_0", # Only Q8_0, BF16, F16 supported
        token = "hf_...",
    )

In [3]:
torch.cuda.empty_cache()