In [None]:
from unsloth import FastModel

model, tokenizer = FastModel.from_pretrained(
    model_name = "unsloth/gemma-3-4b-it",
    max_seq_length = 2048,
    load_in_4bit = True,
    load_in_8bit = False,
    full_finetuning = False,
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


  from .autonotebook import tqdm as notebook_tqdm


🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.3.16: Fast Gemma3 patching. Transformers: 4.50.0.dev0.
   \\   /|    NVIDIA GeForce RTX 4090. Num GPUs = 1. Max memory: 23.533 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 8.9. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Using bfloat16 full finetuning which cuts memory usage by 50%.


Downloading shards: 100%|██████████| 2/2 [01:16<00:00, 38.48s/it]
Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00,  1.26it/s]
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.48, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


In [2]:
model = FastModel.get_peft_model(
    model,
    finetune_vision_layers     = False, # Turn off for just text!
    finetune_language_layers   = True,  # Should leave on!
    finetune_attention_modules = True,  # Attention good for GRPO
    finetune_mlp_modules       = True,  # SHould leave on always!

    r = 8,           # Larger = higher accuracy, but might overfit
    lora_alpha = 8,  # Recommended alpha == r at least
    lora_dropout = 0,
    bias = "none",
    random_state = 3407,
)

Unsloth: Full finetuning is enabled, so .get_peft_model has no effect


In [1]:
from datasets import load_dataset

# 加載原始數據集
dataset = load_dataset("hesamation/git-prompt")
original_split = list(dataset.keys())[0]
split_dataset = dataset[original_split].train_test_split(
    test_size=0.3,
    seed=42
)

temp_test = split_dataset["test"].train_test_split(
    test_size=2/3,
    seed=42
)

final_dataset = {
    "train": split_dataset["train"],
    "validation": temp_test["train"],
    "test": temp_test["test"]
}

# 驗證比例是否正確
total_samples = len(dataset[original_split])
print(f"總樣本數: {total_samples}")
print(f"訓練集: {len(final_dataset['train'])} 樣本 ({len(final_dataset['train'])/total_samples:.1%})")
print(f"驗證集: {len(final_dataset['validation'])} 樣本 ({len(final_dataset['validation'])/total_samples:.1%})")
print(f"測試集: {len(final_dataset['test'])} 樣本 ({len(final_dataset['test'])/total_samples:.1%})")

  from .autonotebook import tqdm as notebook_tqdm


總樣本數: 322
訓練集: 225 樣本 (69.9%)
驗證集: 32 樣本 (9.9%)
測試集: 65 樣本 (20.2%)


In [4]:
from datasets import load_dataset
from unsloth.chat_templates import standardize_data_formats
dataset = load_dataset("hesamation/git-prompt", split="train") 
dataset = standardize_data_formats(dataset)

def apply_chat_template(examples):
    formatted_texts = []
    for instruction, response in zip(examples["instruction"], examples["response"]):
        formatted_text = f"<bos><start_of_turn>user\n{instruction}<end_of_turn>\n<start_of_turn>model\n{response}<end_of_turn>"
        formatted_texts.append(formatted_text)
    return {"text": formatted_texts}

dataset = dataset.map(apply_chat_template, batched = True)

In [5]:
print(dataset[0]["text"])

<bos><start_of_turn>user
Create a new branch for my feature<end_of_turn>
<start_of_turn>model
To create a new branch for your feature, use the following Git command:

`git branch feature/your-feature-name`

Replace `your-feature-name` with a descriptive name for your feature. This will create a new branch, but you'll still be on the current branch.

To switch to the new branch, use:

`git checkout feature/your-feature-name`

Alternatively, you can create and switch to the new branch in one step with:

`git checkout -b feature/your-feature-name`<end_of_turn>


In [8]:
from trl import SFTTrainer, SFTConfig
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    eval_dataset = None,
    args = SFTConfig(
        dataset_text_field = "text",
        bf16=True,
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4, # Use GA to mimic batch size!
        warmup_steps = 5,
        num_train_epochs = 1, # Set this for 1 full training run.
        # max_steps = 30,
        learning_rate = 2e-4, # Reduce to 2e-5 for long training runs
        logging_steps = 10,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        report_to = "none", # Use this for WandB etc
    ),
)

Unsloth: We found double BOS tokens - we shall remove one automatically.


Unsloth: Tokenizing ["text"] (num_proc=12): 100%|██████████| 322/322 [00:05<00:00, 54.61 examples/s]


In [9]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 322 | Num Epochs = 1 | Total steps = 40
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 4,300,079,472/4,300,079,472 (100.00% trained)


TypeError: unsupported operand type(s) for /: 'Tensor' and 'NoneType'

In [None]:
from unsloth.chat_templates import get_chat_template
tokenizer = get_chat_template(
    tokenizer,
    chat_template = "gemma-3",
)
formatted_text = f"<bos><start_of_turn>user\nCreate a new branch for my feature<end_of_turn><start_of_turn>model\n"
from transformers import TextStreamer
_ = model.generate(
    **tokenizer(formatted_text, return_tensors = "pt").to("cuda"),
    max_new_tokens = 128,
    temperature = 1.0, top_p = 0.95, top_k = 64,
    streamer = TextStreamer(tokenizer, skip_prompt = True),
)

<bos><start_of_turn>user
Create a new branch for my feature<end_of_turn><start_of_turn>model
To create a new branch for your feature, you can use the following Git command:

`git branch feature/new-feature-name`

Replace `feature/new-feature-name` with the actual name of your feature branch.

After creating the branch, you can switch to it using:

`git checkout feature/new-feature-name`

Alternatively, you can use the `-b` option with `git checkout` to create and switch to the new branch in one step:

`git checkout -b feature/new-feature-name`<end_of_turn>


In [None]:
# model.save_pretrained("gemma-3")  # Local saving
# tokenizer.save_pretrained("gemma-3")