# Fine-tune unsloth/Llama-3.2-1B-Instruct on data/trn.json (instruction: DESCRIBE ABOUT THE PRODUCT.)

This notebook reads JSONL data from `data/trn.json`, builds an instruction-tuning dataset with:
- instruction: `"DESCRIBE ABOUT THE PRODUCT."`
- input: `title`
- output: `content`

and performs LoRA fine-tuning of `unsloth/Llama-3.2-1B-Instruct` using TRL + PEFT.

Notes:
- The notebook uses the provided Hugging Face token to authenticate.
- Adjust training hyperparameters (batch sizes, steps) based on your hardware.


In [None]:
# If running in an isolated environment, install dependencies.
%pip -q install --upgrade "unsloth>=2024.08.08" "transformers>=4.43.3" "datasets>=2.20.0" "accelerate>=0.33.0" "peft>=0.11.1" "trl>=0.9.4" "sentencepiece>=0.2.0" "huggingface_hub>=0.24.6" "triton>=2.3.1"


In [None]:
# Load and prepare dataset from data/trn.json (JSONL)
import json, os, random
from datasets import Dataset, DatasetDict

data_path = 'data/trn.json'
assert os.path.exists(data_path), f'File not found: {data_path}'

instructions, inputs, outputs = [], [], []
with open(data_path, 'r', encoding='utf-8') as f:
    for line in f:
        line = line.strip()
        if not line:
            continue
        obj = json.loads(line)
        instr = 'DESCRIBE ABOUT THE PRODUCT.'
        title = obj.get('title', '') or ''
        content = obj.get('content', '') or ''
        # Skip rows with no meaningful output
        if not title and not content:
            continue
        if not content:
            # If content is empty, you may skip or set a placeholder; we skip to keep target non-empty
            continue
        instructions.append(instr)
        inputs.append(title)
        outputs.append(content)

print(f'Total records loaded: {len(outputs)}')
raw_ds = Dataset.from_dict({'instruction': instructions, 'input': inputs, 'output': outputs})
# Train/validation split
raw_ds = raw_ds.shuffle(seed=42)
if len(raw_ds) > 20:
    ds = raw_ds.train_test_split(test_size=0.05, seed=42)
else:
    ds = {'train': raw_ds, 'test': raw_ds.select(range(0))}
if isinstance(ds, dict):
    ds = DatasetDict(ds)
ds


In [None]:
# Build chat-formatted texts for Model
from typing import List, Dict

def make_chat(sample: Dict) -> List[Dict[str, str]]:
    user_content = f"{sample['instruction']}\nTitle: {sample['input']}".strip()
    assistant_content = sample['output']
    return [
        {'role': 'user', 'content': user_content},
        {'role': 'assistant', 'content': assistant_content},
    ]

def format_sample(sample: Dict, tokenizer) -> str:
    messages = make_chat(sample)
    # include_assistant_response=True to include labels; add_generation_prompt=False for training
    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
    return text

def to_training_texts(dataset, tokenizer):
    return [format_sample(rec, tokenizer) for rec in dataset]


In [None]:
# Load model and tokenizer
import torch
from unsloth import FastLanguageModel

# Read Hugging Face token from environment if provided (no fallback logic)
HF_TOKEN = "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"

model_id = 'unsloth/Llama-3.2-1B-Instruct'

model, tokenizer = FastLanguageModel.from_pretrained(
            model_name = model_id,
            max_seq_length = 2048,
            dtype=None,
            load_in_4bit=True,
            token=HF_TOKEN,
        )
model.config.use_cache = False  # important for training
# Attach LoRA adapters to enable fine-tuning on a 4-bit quantized model
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    target_modules=[
        'q_proj','k_proj','v_proj','o_proj',
        'gate_proj','up_proj','down_proj'
    ],
)
print('Model and tokenizer loaded')


In [None]:
# Prepare training dataset with tokenization
max_length = 1024

def tokenize_function(examples):
    texts = []
    for instr, inp, out in zip(examples['instruction'], examples['input'], examples['output']):
        sample = {'instruction': instr, 'input': inp, 'output': out}
        text = tokenizer.apply_chat_template(
            make_chat(sample), tokenize=False, add_generation_prompt=False
        )
        texts.append(text)
    tok = tokenizer(
        texts,
        truncation=True,
        max_length=max_length,
        padding=False,
        return_tensors=None
    )
    # Labels are the same as input_ids for causal LM training
    tok['labels'] = tok['input_ids'].copy()
    return tok

tokenized = ds.map(tokenize_function, batched=True, remove_columns=ds['train'].column_names)
tokenized


In [None]:
# Configure LoRA and trainer
from trl import SFTTrainer
from transformers import TrainingArguments
import os

output_dir = 'outputs/llama-3.2-1b-lora'
os.makedirs(output_dir, exist_ok=True)

train_batch_size = 16
gradient_accumulation = 2
warmup_steps = 10
num_epochs = 3
learning_rate = 3e-5
logging_steps = 1
save_steps = 200
max_steps = 200

def has_test(ds):
    try:
        return len(ds['test']) > 0
    except Exception:
        return False

training_args = TrainingArguments(
    per_device_train_batch_size=train_batch_size,
    gradient_accumulation_steps=gradient_accumulation,
    num_train_epochs=num_epochs,
    learning_rate=learning_rate,
    logging_steps=logging_steps,
    max_steps=max_steps,
    warmup_steps=warmup_steps,
    save_steps=save_steps,
    fp16 = False,
    bf16 = True,
    optim='paged_adamw_8bit',
    lr_scheduler_type = 'cosine',
    output_dir=output_dir,
    seed=42,
)

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=tokenized['train'],
    eval_dataset=tokenized['test'] if has_test(tokenized) else None,
    args=training_args,
    packing=True,  # pack multiple samples per sequence to utilize context
    max_seq_length=max_length,
)
trainer.model.print_trainable_parameters()
print('Trainer ready')


In [15]:
# Train
train_result = trainer.train()
trainer.save_state()
trainer.save_model(output_dir)  # saves adapters if PEFT is used
print('Training complete. Artifacts saved to', output_dir)


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 1,423,782 | Num Epochs = 1 | Total steps = 200
O^O/ \_/ \    Batch size per device = 16 | Gradient accumulation steps = 2
\        /    Data Parallel GPUs = 1 | Total batch size (16 x 2 x 1) = 32
 "-____-"     Trainable parameters = 11,272,192 of 1,247,086,592 (0.90% trained)


Step,Training Loss
1,2.4535
2,2.3482
3,2.4832
4,2.3112
5,2.2731
6,2.4526
7,2.4146
8,2.467
9,2.4197
10,2.3448


Training complete. Artifacts saved to outputs/llama-3.2-1b-lora


In [16]:
# Inference: load base + adapters and generate for a sample title
from peft import PeftModel
from transformers import pipeline

base_model, _ = FastLanguageModel.from_pretrained(
    model_name = model_id,
    max_seq_length = 2048,
    dtype=None,
    load_in_4bit=True,
    token=HF_TOKEN,
)

base_model.config.use_cache = True
adapted = PeftModel.from_pretrained(base_model, output_dir)
adapted.eval()

def generate_description(title: str, max_new_tokens: int = 128):
    messages = [
        {'role': 'user', 'content': f'DESCRIBE ABOUT THE PRODUCT.\nTitle: {title}'}
    ]
    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer(prompt, return_tensors='pt').to(adapted.device)
    with torch.no_grad():
        out = adapted.generate(**inputs, max_new_tokens=max_new_tokens, do_sample=True, top_p=0.9, temperature=0.7, eos_token_id=tokenizer.eos_token_id)
    text = tokenizer.decode(out[0], skip_special_tokens=True)
    # Heuristic: return only the assistant's part after the generation prompt
    return text.split('assistant')[-1].strip() if 'assistant' in text else text

print(generate_description('Girls Ballet Tutu Neon Pink'))


==((====))==  Unsloth 2025.9.11: Fast Llama patching. Transformers: 4.56.2.
   \\   /|    NVIDIA GeForce RTX 3060. Num GPUs = 1. Max memory: 11.614 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu128. CUDA: 8.6. CUDA Toolkit: 12.8. Triton: 3.4.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.32.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
The girls love tutu!  These adorable tutus are just what you need for their next ballet class. The girls will love wearing these fun, neon tutus to their next ballet class.


## Tips
- If you encounter memory issues, lower `max_length`, increase `gradient_accumulation_steps`, or enable 4-bit quantization.
- You can push the adapter to the Hub by calling `trainer.push_to_hub()` with a repo name and using your token.
- The dataset includes many records with empty `content`; this notebook skips them to ensure non-empty targets.
