In [1]:
!pip install unsloth==2025.7.11
!pip install datasets accelerate peft bitsandbytes transformers trl




In [2]:
from unsloth import FastLanguageModel
from datasets import load_dataset
from transformers import TrainingArguments
from trl import SFTTrainer
import torch


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [3]:
dataset = load_dataset("json", data_files="meditations_finetuning_dataset.json")
dataset = dataset["train"]
print(dataset[0])


{'instruction': "You are the philosopher Marcus Aurelius. Analyze the user's problem from a Stoic perspective and offer guidance in your authentic voice.", 'input': 'Feeling overwhelmed and anxious about the constant stream of disturbing news and social media updates, making it difficult to focus on personal goals and priorities.', 'output': "As I walk through the city, I am reminded that the world has always been plagued by troubles and injustices. Yet, I must not let the cacophony of opinions and the din of disturbing news dictate my inner state. I am the master of my thoughts, and I will not surrender to the whims of others.\n\nIn this era of instant information, it is easy to get caught up in the vortex of negativity. But I must remember that I am not a passive recipient of the world's troubles. I have the power to choose what I focus on, to filter out the noise and attend to what truly matters.\n\nI will not let the anxiety of others infect me. Instead, I will cultivate a sense of

In [4]:
model_name = "unsloth/llama-3-8b-bnb-4bit"

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_name,
    max_seq_length=2048,
    dtype=None,
    load_in_4bit=True,
)


==((====))==  Unsloth 2025.7.11: Fast Llama patching. Transformers: 4.54.0.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.1+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.3.1
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.31.post1. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [5]:
model = FastLanguageModel.get_peft_model(
    model,
    r=16,              # LoRA rank
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",

)


Unsloth: Dropout = 0 is supported for fast patching. You are using dropout = 0.05.
Unsloth will patch all other layers, except LoRA matrices, causing a performance hit.
Unsloth 2025.7.11 patched 32 layers with 0 QKV layers, 0 O layers and 0 MLP layers.


In [6]:
def format_dataset(example):
    return {"text": f"### Instruction:\n{example['instruction']}\n\n### Input:\n{example['input']}\n\n### Response:\n{example['output']}"}

dataset = dataset.map(format_dataset)


In [7]:
training_args = TrainingArguments(
    output_dir="./llama3_lora_meditations",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=2,
    warmup_steps=10,
    num_train_epochs=3,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=20,
    save_total_limit=1,
    optim="adamw_torch",
    eval_steps=100,
    save_strategy="steps",
    save_steps=200,
    logging_dir="./logs",
)


In [8]:
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    eval_dataset=dataset.select(range(50)),  # small eval set
    dataset_text_field="text",
    max_seq_length=2048,
    args=training_args,
)


Unsloth: Tokenizing ["text"]:   0%|          | 0/50 [00:00<?, ? examples/s]

In [9]:
model.gradient_checkpointing_enable()


In [10]:
trainer.train()


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 172 | Num Epochs = 3 | Total steps = 258
O^O/ \_/ \    Batch size per device = 1 | Gradient accumulation steps = 2
\        /    Data Parallel GPUs = 1 | Total batch size (1 x 2 x 1) = 2
 "-____-"     Trainable parameters = 13,631,488 of 8,043,892,736 (0.17% trained)
[34m[1mwandb[0m: Currently logged in as: [33manirudhguptaitm[0m ([33manirudhguptaitm-ggsipu[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
20,1.5146
40,1.2015
60,1.1744
80,1.0931
100,1.0515
120,0.9344
140,0.9239
160,0.9141
180,0.9196
200,0.8078


TrainOutput(global_step=258, training_loss=0.9911704876626185, metrics={'train_runtime': 824.8271, 'train_samples_per_second': 0.626, 'train_steps_per_second': 0.313, 'total_flos': 8926902025666560.0, 'train_loss': 0.9911704876626185})

In [21]:
target_modules = [
    "q_proj", "k_proj", "v_proj", "o_proj"
    # "gate_proj", "up_proj", "down_proj"  # Optional, remove or add here before first run
]

model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=target_modules,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
)



Unsloth: Already have LoRA adapters! We shall skip this step.


In [13]:
model.print_trainable_parameters()


trainable params: 13,631,488 || all params: 8,043,892,736 || trainable%: 0.1695


In [14]:
model.save_pretrained("llama3_meditations_lora")
tokenizer.save_pretrained("llama3_meditations_lora")


('llama3_meditations_lora/tokenizer_config.json',
 'llama3_meditations_lora/special_tokens_map.json',
 'llama3_meditations_lora/tokenizer.json')

In [15]:
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer

base_model = AutoModelForCausalLM.from_pretrained("unsloth/llama-3-8b-bnb-4bit", load_in_4bit=True)
tokenizer = AutoTokenizer.from_pretrained("unsloth/llama-3-8b-bnb-4bit")

model = PeftModel.from_pretrained(base_model, "llama3_meditations_lora")


The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/350 [00:00<?, ?B/s]