In [2]:
from unsloth import FastLanguageModel
import torch

max_seq_length = 2048
dtype = None
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Llama-3.2-1B-Instruct",
    max_seq_length=max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


    PyTorch 2.6.0+cu124 with CUDA 1204 (you have 2.6.0+cu118)
    Python  3.10.11 (you have 3.10.0)
  Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)
  Memory-efficient attention, SwiGLU, sparse and more won't be available.
  Set XFORMERS_MORE_DETAILS=1 for more details


🦥 Unsloth Zoo will now patch everything to make training faster!


  GPU_BUFFERS = tuple([torch.empty(2*256*2048, dtype = dtype, device = f"cuda:{i}") for i in range(n_gpus)])


==((====))==  Unsloth 2025.3.18: Fast Llama patching. Transformers: 4.50.0.
   \\   /|    NVIDIA GeForce GTX 1660 Ti. Num GPUs = 1. Max memory: 6.0 GB. Platform: Windows.
O^O/ \_/ \    Torch: 2.6.0+cu118. CUDA: 7.5. CUDA Toolkit: 11.8. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [3]:
model = FastLanguageModel.get_peft_model(
    model,
    r=32,
    lora_alpha=32,
    use_rslora=True,
    lora_dropout=0,
    bias = "none",
    target_modules=["q_proj", "k_proj", "v_proj", "up_proj", "down_proj", "o_proj", "gate_proj"],
    use_gradient_checkpointing="unsloth",
    random_state = 32,
    loftq_config = None,
)

Unsloth 2025.3.18 patched 16 layers with 16 QKV layers, 16 O layers and 16 MLP layers.


In [4]:
from unsloth.chat_templates import get_chat_template

tokenizer = get_chat_template(
    tokenizer,
    chat_template = "llama-3.1"
)

def formatting_prompts_func(examples):
    user_prompts = [{"role": "user", "content": text} for text in examples["text"]]
    assistant_outputs = [{"role": "assistant", "content": claim} for claim in examples["claim"]]

    formatted_prompts = [
        [user_prompt, assistant_output]
        for user_prompt, assistant_output in zip(user_prompts, assistant_outputs)
    ]

    formatted_conversations = [
        tokenizer.apply_chat_template(convo, tokenize=False, add_generation_prompt=False)
        for convo in formatted_prompts
    ]

    return {"text": formatted_conversations}


In [5]:
from datasets import load_dataset

dataset = load_dataset('csv', data_files={'train': 'data/reduced-train-eng.csv'}, delimiter=',')

In [6]:
from unsloth.chat_templates import standardize_sharegpt
dataset = standardize_sharegpt(dataset["train"])
dataset = dataset.map(formatting_prompts_func, batched= True,)

In [7]:
print(dataset[5]["text"])

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 26 July 2024

<|eot_id|><|start_header_id|>user<|end_header_id|>

I'm Cadbury's Managing Director Eoin Kellett, we have a little surprise this year for Halloween for everyone who shares+comments. Cadbury will send each person who does this a Cadbury Halloween hamper<|eot_id|><|start_header_id|>assistant<|end_header_id|>

Cadbury is offering to send each person who shares social media content a chocolate hamper for Halloween.<|eot_id|>


In [8]:
import unsloth
import torch
from trl import SFTTrainer
from transformers import TrainingArguments, TextStreamer
from unsloth.chat_templates import get_chat_template
from unsloth import FastLanguageModel
from datasets import Dataset
from unsloth import is_bfloat16_supported

trainer=SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=1,
    packing=True,
    args=TrainingArguments(
        learning_rate=1e-4,
        lr_scheduler_type="linear",
        per_device_train_batch_size=4,
        gradient_accumulation_steps=8,
        num_train_epochs=1,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=1,
        optim="adamw_8bit",
        weight_decay=0.01,
        output_dir="output",
        seed=0,
    ),
)





In [None]:
from unsloth.chat_templates import train_on_responses_only

#trainer = train_on_responses_only(
#    trainer,
#    instruction_part="<|start_header_id|>user<|end_header_id|>",
#    response_part="<|start_header_id|>assistant<|end_header_id|>"
#)

In [None]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 50 | Num Epochs = 1 | Total steps = 1
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 8
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 8 x 1) = 32
 "-____-"     Trainable parameters = 22,544,384/1,000,000,000 (2.25% trained)


In [64]:
model.save_pretrained("model/1B_finetuned_llama3.2")
tokenizer.save_pretrained("model/1B_finetuned_llama3.2")

('model/1B_finetuned_llama3.2\\tokenizer_config.json',
 'model/1B_finetuned_llama3.2\\special_tokens_map.json',
 'model/1B_finetuned_llama3.2\\tokenizer.json')

In [65]:
from unsloth.chat_templates import get_chat_template

tokenizer = get_chat_template(
    tokenizer,
    chat_template= "llama-3.1"
)

FastLanguageModel.for_inference(model)

messages = [
    {"role": "system", "content": "You are an AI assistant designed to extract claims from a given passage of text. Keep it short and return the claim in the text. Only return the big idea and exclude unneeded details."},
    {"role": "user", "content": "Money is the root of all evil. One time my friend abandoned me for quick buck. Do you think that's weird at all? I don't even know."}
]

inputs = tokenizer.apply_chat_template(
    messages,
    tokenize=True,
    add_generation_prompt = True,
    return_tensors = "pt",
).to("cuda")

outputs = model.generate(input_ids = inputs, max_new_tokens = 64, use_cache = True, temperature = 0.7)

tokenizer.batch_decode(outputs, skip_special_tokens=True)

["system\n\nCutting Knowledge Date: December 2023\nToday Date: 26 July 2024\n\nYou are an AI assistant designed to extract claims from a given passage of text. Keep it short and return the claim in the text. Only return the big idea and exclude unneeded details.user\n\nMoney is the root of all evil. One time my friend abandoned me for quick buck. Do you think that's weird at all? I don't even know.assistant\n\nCutting Knowledge Date: December 2023\nToday Date: 26 July 2024\n\nCutting Knowledge Date: December 2023\nToday Date: 26 July 2024\n\nCutting Knowledge Date: December 2023\nToday Date: 26 July 2024\n\nCutting Knowledge Date"]