In [4]:
from unsloth import FastLanguageModel
import torch
import pandas as pd
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA version: {torch.version.cuda}")
print(f"GPU count: {torch.cuda.device_count()}")
print(f"GPU name: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'N/A'}")

CUDA available: True
CUDA version: 12.1
GPU count: 1
GPU name: NVIDIA GeForce RTX 2070


In [3]:
max_seq_length = 2048
dtype = None
load_in_4bit = True


model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Meta-Llama-3.1-8B",
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit
)

==((====))==  Unsloth 2025.2.15: Fast Llama patching. Transformers: 4.49.0.
   \\   /|    GPU: NVIDIA GeForce RTX 2070. Max memory: 8.0 GB. Platform: Windows.
O^O/ \_/ \    Torch: 2.5.1+cu121. CUDA: 7.5. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [6]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)

Unsloth 2025.2.15 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [None]:
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{instruction}

### Input:
{input}

### Response:
{response}
"""

In [9]:
EOS_TOKEN = tokenizer.eos_token

def formatting_prompts_func(examples):
    instruction = examples['instruction'] #prompts
    input = examples['input'] #context
    response = examples['output'] #target
    texts = []
    for instruction, input, response in zip(instruction, input, response):
        text = alpaca_prompt.format(instruction=instruction, input=input, response=response) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }

from datasets import load_dataset
dataset = load_dataset("json", data_files="hf://datasets/Vezora/Tested-143k-Python-Alpaca/143k-Tested-Python-Alpaca-Vezora.json", split="train")
dataset = dataset.map(formatting_prompts_func, batched=True)

Generating train split: 143327 examples [00:02, 60254.07 examples/s]
Map: 100%|██████████| 143327/143327 [00:01<00:00, 89275.64 examples/s]


In [10]:
dataset['text']

['Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\nHow can I add a function to find the maximum number among a given set of numbers?\n\n### Input:\n\n\n### Response:\n<|end_of_text|>',
 'Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\nDevise a Python function that can detect and eliminate every instance of a particular character from a provided string, preserving the order of the remaining characters. This function should be capable of handling strings embedded within lists, dictionaries, sets, and tuples, and eradicate all instances of the designated character in these embedded strings. It should be able to manage multiple layers of nested strings and other data types such as integers, floats, and custom objects. If a string rep

In [12]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bf16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 1,
    packing = False, # can make 5x training faster for short sequences
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps= 4,
        warmup_steps = 5,
        # num_train_epochs = 1, # set this for 1 full training run.
        max_steps = 60,
        learning_rate = 2e-4,
        fp16 = not is_bf16_supported(),
        bf16 = is_bf16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = 'linear',
        seed = 3407,
        output_dir = "outputs",
    )
)

Converting train dataset to ChatML: 100%|██████████| 143327/143327 [00:11<00:00, 13017.80 examples/s]
Applying chat template to train dataset: 100%|██████████| 143327/143327 [00:08<00:00, 16811.71 examples/s]
Tokenizing train dataset: 100%|██████████| 143327/143327 [01:44<00:00, 1374.45 examples/s]
Truncating train dataset: 100%|██████████| 143327/143327 [00:48<00:00, 2925.88 examples/s]


In [13]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 143,327 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 60
 "-____-"     Number of trainable parameters = 41,943,040


Step,Training Loss
1,0.4623
2,0.455
3,0.3429
4,0.2578
5,0.1043
6,0.0719
7,0.0648
8,0.0732
9,0.0398
10,0.0508


In [1]:
model.save_pretrained("model", safe_serialization=True)
tokenizer.save_pretrained("model")

NameError: name 'model' is not defined