In [None]:
!pip install torch torchvision torchaudio xformers --index-url https://download.pytorch.org/whl/cu128
!pip install unsloth
!pip install transformers==4.56.2
!pip install --no-deps trl==0.22.2


In [None]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/tinyllama-bnb-4bit",
    max_seq_length=4096,
    dtype=None,
    load_in_4bit=True,
)


In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r=32,
    target_modules=[...],
    lora_alpha=32,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing=False,
)


In [None]:
alpaca_prompt = """..."""
EOS_TOKEN = tokenizer.eos_token


In [None]:
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    packing=True,
    args=SFTConfig(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        num_train_epochs=1,
        learning_rate=2e-5,
        optim="adamw_8bit",
    ),
)


In [None]:
torch.cuda.max_memory_reserved()


In [None]:
FastLanguageModel.for_inference(model)


In [None]:
model.save_pretrained("lora_model")
tokenizer.save_pretrained("lora_model")


In [None]:
FastLanguageModel.from_pretrained("lora_model")


In [None]:
model.save_pretrained_merged(..., save_method="merged_16bit")


In [None]:
model.save_pretrained_gguf(..., quantization_method="q4_k_m")


In [None]:
model.to("cuda")


In [None]:
seed=3407


In [None]:
max_steps = 100
num_train_epochs = None


In [None]:
# =========================
# 1. Imports & Config
# =========================
from unsloth import FastLanguageModel
from datasets import load_dataset
from trl import SFTTrainer, SFTConfig
import torch

max_seq_length = 4096
dtype = None
load_in_4bit = True


In [None]:
# =========================
# 2. Load Model (4-bit)
# =========================
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/tinyllama-bnb-4bit",
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)


In [None]:
# =========================
# 3. Apply LoRA
# =========================
model = FastLanguageModel.get_peft_model(
    model,
    r=32,
    target_modules=[
        "q_proj","k_proj","v_proj","o_proj",
        "gate_proj","up_proj","down_proj"
    ],
    lora_alpha=32,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing=False,
    random_state=3407,
)


In [None]:
# =========================
# 4. Dataset Preparation
# =========================
alpaca_prompt = """Below is an instruction...

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token

def format_data(examples):
    texts = []
    for i, inp, out in zip(
        examples["instruction"],
        examples["input"],
        examples["output"]
    ):
        texts.append(alpaca_prompt.format(i, inp, out) + EOS_TOKEN)
    return {"text": texts}

dataset = load_dataset("yahma/alpaca-cleaned", split="train")
dataset = dataset.map(format_data, batched=True)


In [None]:
# =========================
# 5. Training
# =========================
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    packing=True,
    args=SFTConfig(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        num_train_epochs=1,
        learning_rate=2e-5,
        optim="adamw_8bit",
        output_dir="outputs",
        report_to="none",
    ),
)

trainer.train()


In [None]:
# =========================
# 6. Inference
# =========================
FastLanguageModel.for_inference(model)

inputs = tokenizer(
    alpaca_prompt.format(
        "Continue the Fibonacci sequence",
        "1, 1, 2, 3, 5, 8",
        ""
    ),
    return_tensors="pt"
).to("cuda")

outputs = model.generate(**inputs, max_new_tokens=64)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))


In [None]:
# =========================
# 7. Save LoRA
# =========================
model.save_pretrained("lora_model")
tokenizer.save_pretrained("lora_model")