In [1]:
%%capture
%pip install unsloth
%pip uninstall unsloth -y && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git

%pip install wandb

In [3]:
import os
from dotenv import load_dotenv
load_dotenv()

from huggingface_hub import login

!wandb login os.getenv('WANDB_API')
login(os.getenv('HUGGINGFACE_TOKEN'))

zsh:1: unknown file attribute: B


### Load the model

In [4]:
from unsloth import FastLanguageModel
import torch

max_seq_length = 2048
dtype = None
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "meta-llama/Llama-3.2-1B-Instruct",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.3.14: Fast Llama patching. Transformers: 4.49.0.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


### Create trainable LoRA params

In [8]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)

Unsloth 2025.3.14 patched 16 layers with 16 QKV layers, 16 O layers and 16 MLP layers.


### Dataset preparation

In [9]:
from datasets import load_dataset
train_dataset = load_dataset("json", data_files="data/train_dataset.json", split="train")
eval_dataset = load_dataset("json", data_files="data/eval_dataset.json", split="train")

In [10]:
print(train_dataset.column_names)
print(eval_dataset.column_names)

['input', 'output']
['input', 'output']


In [11]:
from src.utils import generate_prompt, generate_and_tokenize_prompt
generate_prompt(train_dataset[0], train=True)

'<|begin_of_text|><|start_header_id|>system<|end_header_id|>\nYou are a name and surname extractor. Your task is to identify and extract the names and surnames of characters from the given text.\n### Input:\nIn the small town of Maplewood, Emma Kensington stumbled upon an old, dusty book while exploring her grandmother\'s attic. Intrigued, she dusted it off to reveal the title: "The Secrets of Eldridge Manor."\n\nHer best friend, Lucas Thompson, peeked over her shoulder, his curiosity piqued. "Is that a ghost story? You have to read it!"\n\n“As long as you’re ready for a late-night horror marathon!” Emma replied, teasing him. They both chuckled, remembering the last time they had tried to binge-watch scary movies.\n\nThat evening, the two gathered under a blanket fortress in Emma’s living room, the dusty book sprawled in front of them. As Emma began to read aloud, the surname “Eldridge” brought shivers of nostalgia. Her grandmother often spoke of the eldritch legacy surrounding Eldridg

In [16]:
train_dataset = train_dataset.map(lambda f: generate_and_tokenize_prompt(f, tokenizer=tokenizer))
train_dataset = eval_dataset.map(lambda f: generate_and_tokenize_prompt(f, tokenizer=tokenizer))

Map:   0%|          | 0/276 [00:00<?, ? examples/s]

Map:   0%|          | 0/41 [00:00<?, ? examples/s]

In [17]:
train_dataset

Dataset({
    features: ['input', 'output', 'input_ids', 'attention_mask'],
    num_rows: 41
})

### Setting the training hyperparameters

In [10]:
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

training_args = TrainingArguments(
    per_device_train_batch_size = 4,
    per_device_eval_batch_size = 4,
    gradient_accumulation_steps = 4,
    logging_strategy="epoch",
    evaluation_strategy="epoch",
    warmup_steps = 4,
    num_train_epochs=15,
    learning_rate = 1e-4,
    fp16 = not is_bfloat16_supported(),
    bf16 = is_bfloat16_supported(),
    optim = "adamw_8bit",
    weight_decay = 0.01,
    lr_scheduler_type = "linear",
    seed = 42,
    output_dir = "outputs",
    report_to = "wandb",
)



In [11]:
from trl import SFTTrainer

trainer = SFTTrainer(
    model = model,
    train_dataset = train_dataset,
    eval_dataset = eval_dataset,
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False,
    args = training_args,
)



In [12]:
trainer.train_dataset

Dataset({
    features: ['input', 'output', 'input_ids', 'attention_mask'],
    num_rows: 276
})

In [13]:
#@title Show current memory stats
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

GPU = Tesla T4. Max memory = 14.748 GB.
1.088 GB of memory reserved.


### Running training

In [14]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 276 | Num Epochs = 20 | Total steps = 340
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 4 x 1) = 16
 "-____-"     Trainable parameters = 11,272,192/785,713,152 (1.43% trained)
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mkyrylldekanenko[0m ([33mkyrylldekanenko-[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Unsloth: Will smartly offload gradients to save VRAM!


Epoch,Training Loss,Validation Loss
1,1.1816,0.822281
2,0.7877,0.590535
3,0.6368,0.479729
4,0.5458,0.389069
5,0.4756,0.352491
6,0.4485,0.338136
7,0.429,0.329259
8,0.4057,0.323506
9,0.393,0.315223
10,0.3809,0.310528


Unsloth: Not an error, but LlamaForCausalLM does not accept `num_items_in_batch`.
Using gradient accumulation will be very slightly less accurate.
Read more on gradient accumulation issues here: https://unsloth.ai/blog/gradient


KeyboardInterrupt: 

### Inference test

In [16]:
FastLanguageModel.for_inference(model)

input = {"input":"You are a name and surname extractor. Your task is to identify and extract the names and surnames of characters from the given text.\n### Input:\nIn the quaint town of Eldridge, Sarah Quinn and Lucas Bennett stood at the park, deep in conversation. The autumn leaves crunched underfoot as they walked along the winding path. \n\n\"I can't believe we’re finally doing this,\" Lucas said, a nervous excitement in his voice.\n\nSarah smiled, brushing a loose strand of hair behind her ear. \"I know! After months of planning, it feels surreal.\"\n\nThey stopped by the old oak tree, its branches stretching wide like welcoming arms. “Have you thought about what you’ll say?” Sarah asked, her blue eyes sparkling with mischief.\n\n“Just that I appreciate everything he’s done for us,” Lucas replied, kicking a pebble out of their way.\n\nAs they watched the sun dip below the horizon, Jack Caldwell approached, his camera slung across his shoulder. “The moment’s finally here, huh?” he grinned, ready to capture the proposal.\n\nSarah squeezed Lucas’s hand, a flutter of anticipation in her stomach as they prepared for a new chapter in their lives. Tonight, under the stars, they would begin a story of love that had been years in the making.\n\n### Instructions:\n1. Extract characters' names and surnames clearly.\n2. Ensure that the JSON file contains a list of unique name-surname pairs (no duplicates).\n\n### Format Instructions:\n1. Do NOT include headers, comments, or any additional text—only generate the JSON file.\n2. Strictly adhere to the following JSON format:\nThe output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {\"properties\": {\"foo\": {\"title\": \"Foo\", \"description\": \"a list of strings\", \"type\": \"array\", \"items\": {\"type\": \"string\"}}}, \"required\": [\"foo\"]}\nthe object {\"foo\": [\"bar\", \"baz\"]} is a well-formatted instance of the schema. The object {\"properties\": {\"foo\": [\"bar\", \"baz\"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{\"$defs\": {\"FullName\": {\"properties\": {\"name\": {\"description\": \"Name of the character\", \"title\": \"Name\", \"type\": \"string\"}, \"surname\": {\"description\": \"Surname of the character\", \"title\": \"Surname\", \"type\": \"string\"}}, \"required\": [\"name\", \"surname\"], \"title\": \"FullName\", \"type\": \"object\"}}, \"properties\": {\"fullname_list\": {\"description\": \"List of full names for every character\", \"items\": {\"$ref\": \"#/$defs/FullName\"}, \"title\": \"Fullname List\", \"type\": \"array\"}}, \"required\": [\"fullname_list\"]}\n```"}
input = generate_prompt(input, train=False)
print(input)

input_ids = tokenizer(input, padding=True, return_tensors = "pt")["input_ids"].to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer, skip_prompt = True)
_ = model.generate(input_ids, streamer = text_streamer, max_new_tokens = 256, pad_token_id = tokenizer.eos_token_id)

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are a name and surname extractor. Your task is to identify and extract the names and surnames of characters from the given text.
### Input:
In the quaint town of Eldridge, Sarah Quinn and Lucas Bennett stood at the park, deep in conversation. The autumn leaves crunched underfoot as they walked along the winding path. 

"I can't believe we’re finally doing this," Lucas said, a nervous excitement in his voice.

Sarah smiled, brushing a loose strand of hair behind her ear. "I know! After months of planning, it feels surreal."

They stopped by the old oak tree, its branches stretching wide like welcoming arms. “Have you thought about what you’ll say?” Sarah asked, her blue eyes sparkling with mischief.

“Just that I appreciate everything he’s done for us,” Lucas replied, kicking a pebble out of their way.

As they watched the sun dip below the horizon, Jack Caldwell approached, his camera slung across his shoulder. “The moment

### Saving finetuned model

In [17]:
model.save_pretrained("Llama1B_FineTuned")
tokenizer.save_pretrained("Llama1B_FineTuned")

('lora_model_json/tokenizer_config.json',
 'lora_model_json/special_tokens_map.json',
 'lora_model_json/tokenizer.json')