In [1]:
from IPython.display import clear_output
import json
import os
import datasets
import torch
from unsloth import FastLanguageModel

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
Unsloth: OpenAI failed to import - ignoring for now.
🦥 Unsloth Zoo will now patch everything to make training faster!
INFO 03-26 16:42:28 __init__.py:190] Automatically detected platform cuda.


In [None]:
max_seq_length = 2048
dtype = (
    None
)
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/gemma-2-2b-it-bnb-4bit",
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)

==((====))==  Unsloth 2025.3.19: Fast Gemma2 patching. Transformers: 4.50.1. vLLM: 0.7.2.
   \\   /|    NVIDIA A100-SXM4-80GB. Num GPUs = 1. Max memory: 79.151 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 8.0. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post3. FA2 = True]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [None]:
START_CALL_TOKEN = '<function_call>'
END_CALL_TOKEN = '</function_call>'

model = FastLanguageModel.get_peft_model(
    model,
    r = 4,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 4,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = True,
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)

In [None]:
system_prompt = """You are a helpful assistant.
At the moment you are capable of calling 3 functions: rgb_to_gray, mirror_image, resize_image.
You are given a question or task as input. If it indicates the function call that you are capable of, perform it by placing
the call between <function_call> and </function_call> in the response. Otherwise answer normally.

### Input:
{}

### Response:
{}
"""

EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_prompts_func(batch):
    inputs       = batch["instruction"]
    outputs      = batch["response"]
    texts = []
    for input, output in zip(inputs, outputs):
        # Must add EOS_TOKEN, otherwise generation will go on forever!
        text = system_prompt.format(input, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }
pass

In [88]:
dataset = datasets.load_dataset('json', data_files="/llm/data/function_calls/dataset.jsonl", split="train")
dolly15k_dataset = datasets.load_dataset('databricks/databricks-dolly-15k', split='train')
dolly15k_subset = dolly15k_dataset.select(range(150))
dolly15k_subset = dolly15k_subset.map(formatting_prompts_func, batched=True, remove_columns=['category', 'context'])

dataset = datasets.concatenate_datasets([dataset, dolly15k_subset])
dataset = dataset.map(formatting_prompts_func, batched=True)
dataset

Dataset({
    features: ['instruction', 'response', 'text'],
    num_rows: 300
})

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False,
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        max_steps = 60,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none",
    ),
)

Map (num_proc=2):   0%|          | 0/300 [00:00<?, ? examples/s]

In [90]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 300 | Num Epochs = 3 | Total steps = 111
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 5,191,680/2,000,000,000 (0.26% trained)


Step,Training Loss
1,2.9399
2,2.8008
3,2.9363
4,2.662
5,2.466
6,2.5071
7,2.3938
8,2.123
9,1.906
10,1.7142


In [None]:
model.save_pretrained("lora_model")
tokenizer.save_pretrained("lora_model")

('lora_model/tokenizer_config.json',
 'lora_model/special_tokens_map.json',
 'lora_model/tokenizer.model',
 'lora_model/added_tokens.json',
 'lora_model/tokenizer.json')

In [None]:
from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer, skip_prompt=False)
FastLanguageModel.for_inference(model)


In [None]:
inputs = tokenizer(
[
    system_prompt.format(
        "adjust the size of tiger.bmp to 1920x1080",
        "", # output
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens=128, use_cache=True, streamer=text_streamer)

<bos>You are a helpful assistant.
At the moment you are capable of calling 3 functions: rgb_to_gray, mirror_image, resize_image.
You are given a question or task as input. If it indicates the function call that you are capable of, perform it by placing
the call between <function_call> and </function_call> in the response. Otherwise answer normally.

### Input:
adjust the size of tiger.bmp to 1920x1080

### Response:

<function_call>resize_image('tiger.bmp', 'tiger_resized.bmp', (1920, 1080))</function_call>
<eos>


In [None]:
inputs = tokenizer(
[
    system_prompt.format(
        "Continue the fibonnaci sequence: 1, 1, 2, 3, 5, 8...",
        "", # output
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens=128, use_cache=True, streamer=text_streamer)

<bos>You are a helpful assistant.
At the moment you are capable of calling 3 functions: rgb_to_gray, mirror_image, resize_image.
You are given a question or task as input. If it indicates the function call that you are capable of, perform it by placing
the call between <function_call> and </function_call> in the response. Otherwise answer normally.

### Input:
Continue the fibonnaci sequence: 1, 1, 2, 3, 5, 8...

### Response:

1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144
<eos>


In [None]:
inputs = tokenizer(
[
    system_prompt.format(
        "turn le_gorille.png gray",
        "", # output
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens=128, use_cache=True, streamer=text_streamer)

<bos>You are a helpful assistant.
At the moment you are capable of calling 3 functions: rgb_to_gray, mirror_image, resize_image.
You are given a question or task as input. If it indicates the function call that you are capable of, perform it by placing
the call between <function_call> and </function_call> in the response. Otherwise answer normally.

### Input:
turn le_gorille.png gray

### Response:

<function_call>rgb_to_gray('le_gorille.png', 'le_gorille_grayscale.png')</function_call>
<eos>
